i915/gt/gen2_engine_cs.c

0001 // SPDX-License-Identifier: MIT
0002 /*
0003  * Copyright © 2020 Intel Corporation
0004  */
0005
0006 #include "gen2_engine_cs.h"
0007 #include "i915_drv.h"
0008 #include "i915_reg.h"
0009 #include "intel_engine.h"
0010 #include "intel_engine_regs.h"
0011 #include "intel_gpu_commands.h"
0012 #include "intel_gt.h"
0013 #include "intel_gt_irq.h"
0014 #include "intel_ring.h"
0015
0016 int gen2_emit_flush(struct i915_request *rq, u32 mode)
0017 {
0018     unsigned int num_store_dw = 12;
0019     u32 cmd, *cs;
0020
0021     cmd = MI_FLUSH;
0022     if (mode & EMIT_INVALIDATE)
0023         cmd |= MI_READ_FLUSH;
0024
0025     cs = intel_ring_begin(rq, 2 + 4 * num_store_dw);
0026     if (IS_ERR(cs))
0027         return PTR_ERR(cs);
0028
0029     *cs++ = cmd;
0030     while (num_store_dw--) {
0031         *cs++ = MI_STORE_DWORD_INDEX;
0032         *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
0033         *cs++ = 0;
0034         *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
0035     }
0036     *cs++ = cmd;
0037
0038     intel_ring_advance(rq, cs);
0039
0040     return 0;
0041 }
0042
0043 int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
0044 {
0045     u32 cmd, *cs;
0046     int i;
0047
0048     /*
0049      * read/write caches:
0050      *
0051      * I915_GEM_DOMAIN_RENDER is always invalidated, but is
0052      * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
0053      * also flushed at 2d versus 3d pipeline switches.
0054      *
0055      * read-only caches:
0056      *
0057      * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
0058      * MI_READ_FLUSH is set, and is always flushed on 965.
0059      *
0060      * I915_GEM_DOMAIN_COMMAND may not exist?
0061      *
0062      * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
0063      * invalidated when MI_EXE_FLUSH is set.
0064      *
0065      * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
0066      * invalidated with every MI_FLUSH.
0067      *
0068      * TLBs:
0069      *
0070      * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
0071      * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
0072      * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
0073      * are flushed at any MI_FLUSH.
0074      */
0075
0076     cmd = MI_FLUSH;
0077     if (mode & EMIT_INVALIDATE) {
0078         cmd |= MI_EXE_FLUSH;
0079         if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5)
0080             cmd |= MI_INVALIDATE_ISP;
0081     }
0082
0083     i = 2;
0084     if (mode & EMIT_INVALIDATE)
0085         i += 20;
0086
0087     cs = intel_ring_begin(rq, i);
0088     if (IS_ERR(cs))
0089         return PTR_ERR(cs);
0090
0091     *cs++ = cmd;
0092
0093     /*
0094      * A random delay to let the CS invalidate take effect? Without this
0095      * delay, the GPU relocation path fails as the CS does not see
0096      * the updated contents. Just as important, if we apply the flushes
0097      * to the EMIT_FLUSH branch (i.e. immediately after the relocation
0098      * write and before the invalidate on the next batch), the relocations
0099      * still fail. This implies that is a delay following invalidation
0100      * that is required to reset the caches as opposed to a delay to
0101      * ensure the memory is written.
0102      */
0103     if (mode & EMIT_INVALIDATE) {
0104         *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
0105         *cs++ = intel_gt_scratch_offset(rq->engine->gt,
0106                         INTEL_GT_SCRATCH_FIELD_DEFAULT) |
0107             PIPE_CONTROL_GLOBAL_GTT;
0108         *cs++ = 0;
0109         *cs++ = 0;
0110
0111         for (i = 0; i < 12; i++)
0112             *cs++ = MI_FLUSH;
0113
0114         *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
0115         *cs++ = intel_gt_scratch_offset(rq->engine->gt,
0116                         INTEL_GT_SCRATCH_FIELD_DEFAULT) |
0117             PIPE_CONTROL_GLOBAL_GTT;
0118         *cs++ = 0;
0119         *cs++ = 0;
0120     }
0121
0122     *cs++ = cmd;
0123
0124     intel_ring_advance(rq, cs);
0125
0126     return 0;
0127 }
0128
0129 int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode)
0130 {
0131     u32 *cs;
0132
0133     cs = intel_ring_begin(rq, 2);
0134     if (IS_ERR(cs))
0135         return PTR_ERR(cs);
0136
0137     *cs++ = MI_FLUSH;
0138     *cs++ = MI_NOOP;
0139     intel_ring_advance(rq, cs);
0140
0141     return 0;
0142 }
0143
0144 static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
0145                    int flush, int post)
0146 {
0147     GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
0148     GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
0149
0150     *cs++ = MI_FLUSH;
0151
0152     while (flush--) {
0153         *cs++ = MI_STORE_DWORD_INDEX;
0154         *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
0155         *cs++ = rq->fence.seqno;
0156     }
0157
0158     while (post--) {
0159         *cs++ = MI_STORE_DWORD_INDEX;
0160         *cs++ = I915_GEM_HWS_SEQNO_ADDR;
0161         *cs++ = rq->fence.seqno;
0162     }
0163
0164     *cs++ = MI_USER_INTERRUPT;
0165
0166     rq->tail = intel_ring_offset(rq, cs);
0167     assert_ring_tail_valid(rq->ring, rq->tail);
0168
0169     return cs;
0170 }
0171
0172 u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
0173 {
0174     return __gen2_emit_breadcrumb(rq, cs, 16, 8);
0175 }
0176
0177 u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
0178 {
0179     return __gen2_emit_breadcrumb(rq, cs, 8, 8);
0180 }
0181
0182 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
0183 #define I830_BATCH_LIMIT SZ_256K
0184 #define I830_TLB_ENTRIES (2)
0185 #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
0186 int i830_emit_bb_start(struct i915_request *rq,
0187                u64 offset, u32 len,
0188                unsigned int dispatch_flags)
0189 {
0190     u32 *cs, cs_offset =
0191         intel_gt_scratch_offset(rq->engine->gt,
0192                     INTEL_GT_SCRATCH_FIELD_DEFAULT);
0193
0194     GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
0195
0196     cs = intel_ring_begin(rq, 6);
0197     if (IS_ERR(cs))
0198         return PTR_ERR(cs);
0199
0200     /* Evict the invalid PTE TLBs */
0201     *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
0202     *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
0203     *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
0204     *cs++ = cs_offset;
0205     *cs++ = 0xdeadbeef;
0206     *cs++ = MI_NOOP;
0207     intel_ring_advance(rq, cs);
0208
0209     if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
0210         if (len > I830_BATCH_LIMIT)
0211             return -ENOSPC;
0212
0213         cs = intel_ring_begin(rq, 6 + 2);
0214         if (IS_ERR(cs))
0215             return PTR_ERR(cs);
0216
0217         /*
0218          * Blit the batch (which has now all relocs applied) to the
0219          * stable batch scratch bo area (so that the CS never
0220          * stumbles over its tlb invalidation bug) ...
0221          */
0222         *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
0223         *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
0224         *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
0225         *cs++ = cs_offset;
0226         *cs++ = 4096;
0227         *cs++ = offset;
0228
0229         *cs++ = MI_FLUSH;
0230         *cs++ = MI_NOOP;
0231         intel_ring_advance(rq, cs);
0232
0233         /* ... and execute it. */
0234         offset = cs_offset;
0235     }
0236
0237     if (!(dispatch_flags & I915_DISPATCH_SECURE))
0238         offset |= MI_BATCH_NON_SECURE;
0239
0240     cs = intel_ring_begin(rq, 2);
0241     if (IS_ERR(cs))
0242         return PTR_ERR(cs);
0243
0244     *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
0245     *cs++ = offset;
0246     intel_ring_advance(rq, cs);
0247
0248     return 0;
0249 }
0250
0251 int gen3_emit_bb_start(struct i915_request *rq,
0252                u64 offset, u32 len,
0253                unsigned int dispatch_flags)
0254 {
0255     u32 *cs;
0256
0257     if (!(dispatch_flags & I915_DISPATCH_SECURE))
0258         offset |= MI_BATCH_NON_SECURE;
0259
0260     cs = intel_ring_begin(rq, 2);
0261     if (IS_ERR(cs))
0262         return PTR_ERR(cs);
0263
0264     *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
0265     *cs++ = offset;
0266     intel_ring_advance(rq, cs);
0267
0268     return 0;
0269 }
0270
0271 int gen4_emit_bb_start(struct i915_request *rq,
0272                u64 offset, u32 length,
0273                unsigned int dispatch_flags)
0274 {
0275     u32 security;
0276     u32 *cs;
0277
0278     security = MI_BATCH_NON_SECURE_I965;
0279     if (dispatch_flags & I915_DISPATCH_SECURE)
0280         security = 0;
0281
0282     cs = intel_ring_begin(rq, 2);
0283     if (IS_ERR(cs))
0284         return PTR_ERR(cs);
0285
0286     *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security;
0287     *cs++ = offset;
0288     intel_ring_advance(rq, cs);
0289
0290     return 0;
0291 }
0292
0293 void gen2_irq_enable(struct intel_engine_cs *engine)
0294 {
0295     struct drm_i915_private *i915 = engine->i915;
0296
0297     i915->irq_mask &= ~engine->irq_enable_mask;
0298     intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
0299     ENGINE_POSTING_READ16(engine, RING_IMR);
0300 }
0301
0302 void gen2_irq_disable(struct intel_engine_cs *engine)
0303 {
0304     struct drm_i915_private *i915 = engine->i915;
0305
0306     i915->irq_mask |= engine->irq_enable_mask;
0307     intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
0308 }
0309
0310 void gen3_irq_enable(struct intel_engine_cs *engine)
0311 {
0312     engine->i915->irq_mask &= ~engine->irq_enable_mask;
0313     intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
0314     intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
0315 }
0316
0317 void gen3_irq_disable(struct intel_engine_cs *engine)
0318 {
0319     engine->i915->irq_mask |= engine->irq_enable_mask;
0320     intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
0321 }
0322
0323 void gen5_irq_enable(struct intel_engine_cs *engine)
0324 {
0325     gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
0326 }
0327
0328 void gen5_irq_disable(struct intel_engine_cs *engine)
0329 {
0330     gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
0331 }