Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR MIT
0002 /*
0003  * Copyright 2014-2022 Advanced Micro Devices, Inc.
0004  *
0005  * Permission is hereby granted, free of charge, to any person obtaining a
0006  * copy of this software and associated documentation files (the "Software"),
0007  * to deal in the Software without restriction, including without limitation
0008  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0009  * and/or sell copies of the Software, and to permit persons to whom the
0010  * Software is furnished to do so, subject to the following conditions:
0011  *
0012  * The above copyright notice and this permission notice shall be included in
0013  * all copies or substantial portions of the Software.
0014  *
0015  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0016  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0017  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0018  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0019  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0020  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0021  * OTHER DEALINGS IN THE SOFTWARE.
0022  *
0023  */
0024 
0025 #include "kfd_device_queue_manager.h"
0026 #include "gca/gfx_8_0_enum.h"
0027 #include "gca/gfx_8_0_sh_mask.h"
0028 #include "oss/oss_3_0_sh_mask.h"
0029 
0030 static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
0031                    struct qcm_process_device *qpd,
0032                    enum cache_policy default_policy,
0033                    enum cache_policy alternate_policy,
0034                    void __user *alternate_aperture_base,
0035                    uint64_t alternate_aperture_size);
0036 static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
0037             struct qcm_process_device *qpd,
0038             enum cache_policy default_policy,
0039             enum cache_policy alternate_policy,
0040             void __user *alternate_aperture_base,
0041             uint64_t alternate_aperture_size);
0042 static int update_qpd_vi(struct device_queue_manager *dqm,
0043                     struct qcm_process_device *qpd);
0044 static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
0045             struct qcm_process_device *qpd);
0046 static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
0047                 struct qcm_process_device *qpd);
0048 static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
0049             struct queue *q,
0050             struct qcm_process_device *qpd);
0051 
0052 void device_queue_manager_init_vi(
0053         struct device_queue_manager_asic_ops *asic_ops)
0054 {
0055     asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
0056     asic_ops->update_qpd = update_qpd_vi;
0057     asic_ops->init_sdma_vm = init_sdma_vm;
0058     asic_ops->mqd_manager_init = mqd_manager_init_vi;
0059 }
0060 
0061 void device_queue_manager_init_vi_tonga(
0062         struct device_queue_manager_asic_ops *asic_ops)
0063 {
0064     asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
0065     asic_ops->update_qpd = update_qpd_vi_tonga;
0066     asic_ops->init_sdma_vm = init_sdma_vm_tonga;
0067     asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
0068 }
0069 
0070 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
0071 {
0072     /* In 64-bit mode, we can only control the top 3 bits of the LDS,
0073      * scratch and GPUVM apertures.
0074      * The hardware fills in the remaining 59 bits according to the
0075      * following pattern:
0076      * LDS:     X0000000'00000000 - X0000001'00000000 (4GB)
0077      * Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
0078      * GPUVM:   Y0010000'00000000 - Y0020000'00000000 (1TB)
0079      *
0080      * (where X/Y is the configurable nybble with the low-bit 0)
0081      *
0082      * LDS and scratch will have the same top nybble programmed in the
0083      * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
0084      * GPUVM can have a different top nybble programmed in the
0085      * top 3 bits of SH_MEM_BASES.SHARED_BASE.
0086      * We don't bother to support different top nybbles
0087      * for LDS/Scratch and GPUVM.
0088      */
0089 
0090     WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
0091         top_address_nybble == 0);
0092 
0093     return top_address_nybble << 12 |
0094             (top_address_nybble << 12) <<
0095             SH_MEM_BASES__SHARED_BASE__SHIFT;
0096 }
0097 
0098 static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
0099                    struct qcm_process_device *qpd,
0100                    enum cache_policy default_policy,
0101                    enum cache_policy alternate_policy,
0102                    void __user *alternate_aperture_base,
0103                    uint64_t alternate_aperture_size)
0104 {
0105     uint32_t default_mtype;
0106     uint32_t ape1_mtype;
0107 
0108     default_mtype = (default_policy == cache_policy_coherent) ?
0109             MTYPE_CC :
0110             MTYPE_NC;
0111 
0112     ape1_mtype = (alternate_policy == cache_policy_coherent) ?
0113             MTYPE_CC :
0114             MTYPE_NC;
0115 
0116     qpd->sh_mem_config = (qpd->sh_mem_config &
0117             SH_MEM_CONFIG__ADDRESS_MODE_MASK) |
0118         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
0119                 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
0120         default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
0121         ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
0122         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
0123 
0124     return true;
0125 }
0126 
0127 static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
0128         struct qcm_process_device *qpd,
0129         enum cache_policy default_policy,
0130         enum cache_policy alternate_policy,
0131         void __user *alternate_aperture_base,
0132         uint64_t alternate_aperture_size)
0133 {
0134     uint32_t default_mtype;
0135     uint32_t ape1_mtype;
0136 
0137     default_mtype = (default_policy == cache_policy_coherent) ?
0138             MTYPE_UC :
0139             MTYPE_NC;
0140 
0141     ape1_mtype = (alternate_policy == cache_policy_coherent) ?
0142             MTYPE_UC :
0143             MTYPE_NC;
0144 
0145     qpd->sh_mem_config =
0146             SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
0147                    SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
0148             default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
0149             ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
0150 
0151     return true;
0152 }
0153 
0154 static int update_qpd_vi(struct device_queue_manager *dqm,
0155                     struct qcm_process_device *qpd)
0156 {
0157     struct kfd_process_device *pdd;
0158     unsigned int temp;
0159 
0160     pdd = qpd_to_pdd(qpd);
0161 
0162     /* check if sh_mem_config register already configured */
0163     if (qpd->sh_mem_config == 0) {
0164         qpd->sh_mem_config =
0165             SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
0166                 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
0167             MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
0168             MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
0169             SH_MEM_CONFIG__PRIVATE_ATC_MASK;
0170 
0171         qpd->sh_mem_ape1_limit = 0;
0172         qpd->sh_mem_ape1_base = 0;
0173     }
0174 
0175     if (qpd->pqm->process->is_32bit_user_mode) {
0176         temp = get_sh_mem_bases_32(pdd);
0177         qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT;
0178         qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 <<
0179                     SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
0180     } else {
0181         temp = get_sh_mem_bases_nybble_64(pdd);
0182         qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
0183         qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
0184             SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
0185         qpd->sh_mem_config |= 1  <<
0186             SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
0187     }
0188 
0189     pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
0190         qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
0191 
0192     return 0;
0193 }
0194 
0195 static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
0196             struct qcm_process_device *qpd)
0197 {
0198     struct kfd_process_device *pdd;
0199     unsigned int temp;
0200 
0201     pdd = qpd_to_pdd(qpd);
0202 
0203     /* check if sh_mem_config register already configured */
0204     if (qpd->sh_mem_config == 0) {
0205         qpd->sh_mem_config =
0206                 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
0207                     SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
0208                 MTYPE_UC <<
0209                     SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
0210                 MTYPE_UC <<
0211                     SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
0212 
0213         qpd->sh_mem_ape1_limit = 0;
0214         qpd->sh_mem_ape1_base = 0;
0215     }
0216 
0217     /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
0218      * aperture addresses.
0219      */
0220     temp = get_sh_mem_bases_nybble_64(pdd);
0221     qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
0222 
0223     pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
0224         temp, qpd->sh_mem_bases);
0225 
0226     return 0;
0227 }
0228 
0229 static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
0230                 struct qcm_process_device *qpd)
0231 {
0232     uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
0233 
0234     if (q->process->is_32bit_user_mode)
0235         value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
0236                 get_sh_mem_bases_32(qpd_to_pdd(qpd));
0237     else
0238         value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
0239                 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
0240                 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
0241 
0242     q->properties.sdma_vm_addr = value;
0243 }
0244 
0245 static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
0246             struct queue *q,
0247             struct qcm_process_device *qpd)
0248 {
0249     /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
0250      * aperture addresses.
0251      */
0252     q->properties.sdma_vm_addr =
0253         ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
0254          SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
0255         SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
0256 }