Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 /*
0004  * Copyright 2016-2022 HabanaLabs, Ltd.
0005  * All Rights Reserved.
0006  */
0007 
0008 #include "goyaP.h"
0009 #include "../include/hw_ip/mmu/mmu_general.h"
0010 #include "../include/hw_ip/mmu/mmu_v1_0.h"
0011 #include "../include/goya/asic_reg/goya_masks.h"
0012 #include "../include/goya/goya_reg_map.h"
0013 
0014 #include <linux/pci.h>
0015 #include <linux/hwmon.h>
0016 #include <linux/iommu.h>
0017 #include <linux/seq_file.h>
0018 
0019 /*
0020  * GOYA security scheme:
0021  *
0022  * 1. Host is protected by:
0023  *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
0024  *        - MMU
0025  *
0026  * 2. DRAM is protected by:
0027  *        - Range registers (protect the first 512MB)
0028  *        - MMU (isolation between users)
0029  *
0030  * 3. Configuration is protected by:
0031  *        - Range registers
0032  *        - Protection bits
0033  *
0034  * When MMU is disabled:
0035  *
0036  * QMAN DMA: PQ, CQ, CP, DMA are secured.
0037  * PQ, CB and the data are on the host.
0038  *
0039  * QMAN TPC/MME:
0040  * PQ, CQ and CP are not secured.
0041  * PQ, CB and the data are on the SRAM/DRAM.
0042  *
0043  * Since QMAN DMA is secured, the driver is parsing the DMA CB:
0044  *     - checks DMA pointer
0045  *     - WREG, MSG_PROT are not allowed.
0046  *     - MSG_LONG/SHORT are allowed.
0047  *
0048  * A read/write transaction by the QMAN to a protected area will succeed if
0049  * and only if the QMAN's CP is secured and MSG_PROT is used
0050  *
0051  *
0052  * When MMU is enabled:
0053  *
0054  * QMAN DMA: PQ, CQ and CP are secured.
0055  * MMU is set to bypass on the Secure props register of the QMAN.
0056  * The reasons we don't enable MMU for PQ, CQ and CP are:
0057  *     - PQ entry is in kernel address space and the driver doesn't map it.
0058  *     - CP writes to MSIX register and to kernel address space (completion
0059  *       queue).
0060  *
0061  * DMA is not secured but because CP is secured, the driver still needs to parse
0062  * the CB, but doesn't need to check the DMA addresses.
0063  *
0064  * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
0065  * the driver doesn't map memory in MMU.
0066  *
0067  * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
0068  *
0069  * DMA RR does NOT protect host because DMA is not secured
0070  *
0071  */
0072 
0073 #define GOYA_BOOT_FIT_FILE  "habanalabs/goya/goya-boot-fit.itb"
0074 #define GOYA_LINUX_FW_FILE  "habanalabs/goya/goya-fit.itb"
0075 
0076 #define GOYA_MMU_REGS_NUM       63
0077 
0078 #define GOYA_DMA_POOL_BLK_SIZE      0x100       /* 256 bytes */
0079 
0080 #define GOYA_RESET_TIMEOUT_MSEC     500     /* 500ms */
0081 #define GOYA_PLDM_RESET_TIMEOUT_MSEC    20000       /* 20s */
0082 #define GOYA_RESET_WAIT_MSEC        1       /* 1ms */
0083 #define GOYA_CPU_RESET_WAIT_MSEC    100     /* 100ms */
0084 #define GOYA_PLDM_RESET_WAIT_MSEC   1000        /* 1s */
0085 #define GOYA_TEST_QUEUE_WAIT_USEC   100000      /* 100ms */
0086 #define GOYA_PLDM_MMU_TIMEOUT_USEC  (MMU_CONFIG_TIMEOUT_USEC * 100)
0087 #define GOYA_PLDM_QMAN0_TIMEOUT_USEC    (HL_DEVICE_TIMEOUT_USEC * 30)
0088 #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC  1000000     /* 1s */
0089 #define GOYA_MSG_TO_CPU_TIMEOUT_USEC    4000000     /* 4s */
0090 #define GOYA_WAIT_FOR_BL_TIMEOUT_USEC   15000000    /* 15s */
0091 
0092 #define GOYA_QMAN0_FENCE_VAL        0xD169B243
0093 
0094 #define GOYA_MAX_STRING_LEN     20
0095 
0096 #define GOYA_CB_POOL_CB_CNT     512
0097 #define GOYA_CB_POOL_CB_SIZE        0x20000     /* 128KB */
0098 
0099 #define IS_QM_IDLE(engine, qm_glbl_sts0) \
0100     (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
0101 #define IS_DMA_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(DMA, qm_glbl_sts0)
0102 #define IS_TPC_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(TPC, qm_glbl_sts0)
0103 #define IS_MME_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(MME, qm_glbl_sts0)
0104 
0105 #define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
0106     (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
0107             engine##_CMDQ_IDLE_MASK)
0108 #define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
0109     IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
0110 #define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
0111     IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
0112 
0113 #define IS_DMA_IDLE(dma_core_sts0) \
0114     !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
0115 
0116 #define IS_TPC_IDLE(tpc_cfg_sts) \
0117     (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
0118 
0119 #define IS_MME_IDLE(mme_arch_sts) \
0120     (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
0121 
0122 static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
0123         "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
0124         "goya cq 4", "goya cpu eq"
0125 };
0126 
0127 static u16 goya_packet_sizes[MAX_PACKET_ID] = {
0128     [PACKET_WREG_32]    = sizeof(struct packet_wreg32),
0129     [PACKET_WREG_BULK]  = sizeof(struct packet_wreg_bulk),
0130     [PACKET_MSG_LONG]   = sizeof(struct packet_msg_long),
0131     [PACKET_MSG_SHORT]  = sizeof(struct packet_msg_short),
0132     [PACKET_CP_DMA]     = sizeof(struct packet_cp_dma),
0133     [PACKET_MSG_PROT]   = sizeof(struct packet_msg_prot),
0134     [PACKET_FENCE]      = sizeof(struct packet_fence),
0135     [PACKET_LIN_DMA]    = sizeof(struct packet_lin_dma),
0136     [PACKET_NOP]        = sizeof(struct packet_nop),
0137     [PACKET_STOP]       = sizeof(struct packet_stop)
0138 };
0139 
0140 static inline bool validate_packet_id(enum packet_id id)
0141 {
0142     switch (id) {
0143     case PACKET_WREG_32:
0144     case PACKET_WREG_BULK:
0145     case PACKET_MSG_LONG:
0146     case PACKET_MSG_SHORT:
0147     case PACKET_CP_DMA:
0148     case PACKET_MSG_PROT:
0149     case PACKET_FENCE:
0150     case PACKET_LIN_DMA:
0151     case PACKET_NOP:
0152     case PACKET_STOP:
0153         return true;
0154     default:
0155         return false;
0156     }
0157 }
0158 
0159 static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
0160     mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
0161     mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
0162     mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
0163     mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
0164     mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
0165     mmTPC0_QM_GLBL_SECURE_PROPS,
0166     mmTPC0_QM_GLBL_NON_SECURE_PROPS,
0167     mmTPC0_CMDQ_GLBL_SECURE_PROPS,
0168     mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
0169     mmTPC0_CFG_ARUSER,
0170     mmTPC0_CFG_AWUSER,
0171     mmTPC1_QM_GLBL_SECURE_PROPS,
0172     mmTPC1_QM_GLBL_NON_SECURE_PROPS,
0173     mmTPC1_CMDQ_GLBL_SECURE_PROPS,
0174     mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
0175     mmTPC1_CFG_ARUSER,
0176     mmTPC1_CFG_AWUSER,
0177     mmTPC2_QM_GLBL_SECURE_PROPS,
0178     mmTPC2_QM_GLBL_NON_SECURE_PROPS,
0179     mmTPC2_CMDQ_GLBL_SECURE_PROPS,
0180     mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
0181     mmTPC2_CFG_ARUSER,
0182     mmTPC2_CFG_AWUSER,
0183     mmTPC3_QM_GLBL_SECURE_PROPS,
0184     mmTPC3_QM_GLBL_NON_SECURE_PROPS,
0185     mmTPC3_CMDQ_GLBL_SECURE_PROPS,
0186     mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
0187     mmTPC3_CFG_ARUSER,
0188     mmTPC3_CFG_AWUSER,
0189     mmTPC4_QM_GLBL_SECURE_PROPS,
0190     mmTPC4_QM_GLBL_NON_SECURE_PROPS,
0191     mmTPC4_CMDQ_GLBL_SECURE_PROPS,
0192     mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
0193     mmTPC4_CFG_ARUSER,
0194     mmTPC4_CFG_AWUSER,
0195     mmTPC5_QM_GLBL_SECURE_PROPS,
0196     mmTPC5_QM_GLBL_NON_SECURE_PROPS,
0197     mmTPC5_CMDQ_GLBL_SECURE_PROPS,
0198     mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
0199     mmTPC5_CFG_ARUSER,
0200     mmTPC5_CFG_AWUSER,
0201     mmTPC6_QM_GLBL_SECURE_PROPS,
0202     mmTPC6_QM_GLBL_NON_SECURE_PROPS,
0203     mmTPC6_CMDQ_GLBL_SECURE_PROPS,
0204     mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
0205     mmTPC6_CFG_ARUSER,
0206     mmTPC6_CFG_AWUSER,
0207     mmTPC7_QM_GLBL_SECURE_PROPS,
0208     mmTPC7_QM_GLBL_NON_SECURE_PROPS,
0209     mmTPC7_CMDQ_GLBL_SECURE_PROPS,
0210     mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
0211     mmTPC7_CFG_ARUSER,
0212     mmTPC7_CFG_AWUSER,
0213     mmMME_QM_GLBL_SECURE_PROPS,
0214     mmMME_QM_GLBL_NON_SECURE_PROPS,
0215     mmMME_CMDQ_GLBL_SECURE_PROPS,
0216     mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
0217     mmMME_SBA_CONTROL_DATA,
0218     mmMME_SBB_CONTROL_DATA,
0219     mmMME_SBC_CONTROL_DATA,
0220     mmMME_WBC_CONTROL_DATA,
0221     mmPCIE_WRAP_PSOC_ARUSER,
0222     mmPCIE_WRAP_PSOC_AWUSER
0223 };
0224 
0225 static u32 goya_all_events[] = {
0226     GOYA_ASYNC_EVENT_ID_PCIE_IF,
0227     GOYA_ASYNC_EVENT_ID_TPC0_ECC,
0228     GOYA_ASYNC_EVENT_ID_TPC1_ECC,
0229     GOYA_ASYNC_EVENT_ID_TPC2_ECC,
0230     GOYA_ASYNC_EVENT_ID_TPC3_ECC,
0231     GOYA_ASYNC_EVENT_ID_TPC4_ECC,
0232     GOYA_ASYNC_EVENT_ID_TPC5_ECC,
0233     GOYA_ASYNC_EVENT_ID_TPC6_ECC,
0234     GOYA_ASYNC_EVENT_ID_TPC7_ECC,
0235     GOYA_ASYNC_EVENT_ID_MME_ECC,
0236     GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
0237     GOYA_ASYNC_EVENT_ID_MMU_ECC,
0238     GOYA_ASYNC_EVENT_ID_DMA_MACRO,
0239     GOYA_ASYNC_EVENT_ID_DMA_ECC,
0240     GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
0241     GOYA_ASYNC_EVENT_ID_PSOC_MEM,
0242     GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
0243     GOYA_ASYNC_EVENT_ID_SRAM0,
0244     GOYA_ASYNC_EVENT_ID_SRAM1,
0245     GOYA_ASYNC_EVENT_ID_SRAM2,
0246     GOYA_ASYNC_EVENT_ID_SRAM3,
0247     GOYA_ASYNC_EVENT_ID_SRAM4,
0248     GOYA_ASYNC_EVENT_ID_SRAM5,
0249     GOYA_ASYNC_EVENT_ID_SRAM6,
0250     GOYA_ASYNC_EVENT_ID_SRAM7,
0251     GOYA_ASYNC_EVENT_ID_SRAM8,
0252     GOYA_ASYNC_EVENT_ID_SRAM9,
0253     GOYA_ASYNC_EVENT_ID_SRAM10,
0254     GOYA_ASYNC_EVENT_ID_SRAM11,
0255     GOYA_ASYNC_EVENT_ID_SRAM12,
0256     GOYA_ASYNC_EVENT_ID_SRAM13,
0257     GOYA_ASYNC_EVENT_ID_SRAM14,
0258     GOYA_ASYNC_EVENT_ID_SRAM15,
0259     GOYA_ASYNC_EVENT_ID_SRAM16,
0260     GOYA_ASYNC_EVENT_ID_SRAM17,
0261     GOYA_ASYNC_EVENT_ID_SRAM18,
0262     GOYA_ASYNC_EVENT_ID_SRAM19,
0263     GOYA_ASYNC_EVENT_ID_SRAM20,
0264     GOYA_ASYNC_EVENT_ID_SRAM21,
0265     GOYA_ASYNC_EVENT_ID_SRAM22,
0266     GOYA_ASYNC_EVENT_ID_SRAM23,
0267     GOYA_ASYNC_EVENT_ID_SRAM24,
0268     GOYA_ASYNC_EVENT_ID_SRAM25,
0269     GOYA_ASYNC_EVENT_ID_SRAM26,
0270     GOYA_ASYNC_EVENT_ID_SRAM27,
0271     GOYA_ASYNC_EVENT_ID_SRAM28,
0272     GOYA_ASYNC_EVENT_ID_SRAM29,
0273     GOYA_ASYNC_EVENT_ID_GIC500,
0274     GOYA_ASYNC_EVENT_ID_PLL0,
0275     GOYA_ASYNC_EVENT_ID_PLL1,
0276     GOYA_ASYNC_EVENT_ID_PLL3,
0277     GOYA_ASYNC_EVENT_ID_PLL4,
0278     GOYA_ASYNC_EVENT_ID_PLL5,
0279     GOYA_ASYNC_EVENT_ID_PLL6,
0280     GOYA_ASYNC_EVENT_ID_AXI_ECC,
0281     GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
0282     GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
0283     GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
0284     GOYA_ASYNC_EVENT_ID_PCIE_DEC,
0285     GOYA_ASYNC_EVENT_ID_TPC0_DEC,
0286     GOYA_ASYNC_EVENT_ID_TPC1_DEC,
0287     GOYA_ASYNC_EVENT_ID_TPC2_DEC,
0288     GOYA_ASYNC_EVENT_ID_TPC3_DEC,
0289     GOYA_ASYNC_EVENT_ID_TPC4_DEC,
0290     GOYA_ASYNC_EVENT_ID_TPC5_DEC,
0291     GOYA_ASYNC_EVENT_ID_TPC6_DEC,
0292     GOYA_ASYNC_EVENT_ID_TPC7_DEC,
0293     GOYA_ASYNC_EVENT_ID_MME_WACS,
0294     GOYA_ASYNC_EVENT_ID_MME_WACSD,
0295     GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
0296     GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
0297     GOYA_ASYNC_EVENT_ID_PSOC,
0298     GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
0299     GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
0300     GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
0301     GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
0302     GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
0303     GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
0304     GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
0305     GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
0306     GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
0307     GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
0308     GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
0309     GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
0310     GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
0311     GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
0312     GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
0313     GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
0314     GOYA_ASYNC_EVENT_ID_TPC0_QM,
0315     GOYA_ASYNC_EVENT_ID_TPC1_QM,
0316     GOYA_ASYNC_EVENT_ID_TPC2_QM,
0317     GOYA_ASYNC_EVENT_ID_TPC3_QM,
0318     GOYA_ASYNC_EVENT_ID_TPC4_QM,
0319     GOYA_ASYNC_EVENT_ID_TPC5_QM,
0320     GOYA_ASYNC_EVENT_ID_TPC6_QM,
0321     GOYA_ASYNC_EVENT_ID_TPC7_QM,
0322     GOYA_ASYNC_EVENT_ID_MME_QM,
0323     GOYA_ASYNC_EVENT_ID_MME_CMDQ,
0324     GOYA_ASYNC_EVENT_ID_DMA0_QM,
0325     GOYA_ASYNC_EVENT_ID_DMA1_QM,
0326     GOYA_ASYNC_EVENT_ID_DMA2_QM,
0327     GOYA_ASYNC_EVENT_ID_DMA3_QM,
0328     GOYA_ASYNC_EVENT_ID_DMA4_QM,
0329     GOYA_ASYNC_EVENT_ID_DMA0_CH,
0330     GOYA_ASYNC_EVENT_ID_DMA1_CH,
0331     GOYA_ASYNC_EVENT_ID_DMA2_CH,
0332     GOYA_ASYNC_EVENT_ID_DMA3_CH,
0333     GOYA_ASYNC_EVENT_ID_DMA4_CH,
0334     GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
0335     GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
0336     GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
0337     GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
0338     GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
0339     GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
0340     GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
0341     GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
0342     GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
0343     GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
0344     GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
0345     GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
0346     GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
0347     GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
0348     GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
0349     GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
0350     GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
0351 };
0352 
0353 static s64 goya_state_dump_specs_props[SP_MAX] = {0};
0354 
0355 static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
0356 static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
0357 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
0358 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
0359 
0360 int goya_set_fixed_properties(struct hl_device *hdev)
0361 {
0362     struct asic_fixed_properties *prop = &hdev->asic_prop;
0363     int i;
0364 
0365     prop->max_queues = GOYA_QUEUE_ID_SIZE;
0366     prop->hw_queues_props = kcalloc(prop->max_queues,
0367             sizeof(struct hw_queue_properties),
0368             GFP_KERNEL);
0369 
0370     if (!prop->hw_queues_props)
0371         return -ENOMEM;
0372 
0373     for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
0374         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
0375         prop->hw_queues_props[i].driver_only = 0;
0376         prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
0377     }
0378 
0379     for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
0380         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
0381         prop->hw_queues_props[i].driver_only = 1;
0382         prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
0383     }
0384 
0385     for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
0386             NUMBER_OF_INT_HW_QUEUES; i++) {
0387         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
0388         prop->hw_queues_props[i].driver_only = 0;
0389         prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
0390     }
0391 
0392     prop->cfg_base_address = CFG_BASE;
0393     prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
0394     prop->host_base_address = HOST_PHYS_BASE;
0395     prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
0396     prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
0397     prop->completion_mode = HL_COMPLETION_MODE_JOB;
0398     prop->dram_base_address = DRAM_PHYS_BASE;
0399     prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
0400     prop->dram_end_address = prop->dram_base_address + prop->dram_size;
0401     prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
0402 
0403     prop->sram_base_address = SRAM_BASE_ADDR;
0404     prop->sram_size = SRAM_SIZE;
0405     prop->sram_end_address = prop->sram_base_address + prop->sram_size;
0406     prop->sram_user_base_address = prop->sram_base_address +
0407                         SRAM_USER_BASE_OFFSET;
0408 
0409     prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
0410     prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
0411     if (hdev->pldm)
0412         prop->mmu_pgt_size = 0x800000; /* 8MB */
0413     else
0414         prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
0415     prop->mmu_pte_size = HL_PTE_SIZE;
0416     prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
0417     prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
0418     prop->dram_page_size = PAGE_SIZE_2MB;
0419     prop->device_mem_alloc_default_page_size = prop->dram_page_size;
0420     prop->dram_supports_virtual_memory = true;
0421 
0422     prop->dmmu.hop_shifts[MMU_HOP0] = MMU_V1_0_HOP0_SHIFT;
0423     prop->dmmu.hop_shifts[MMU_HOP1] = MMU_V1_0_HOP1_SHIFT;
0424     prop->dmmu.hop_shifts[MMU_HOP2] = MMU_V1_0_HOP2_SHIFT;
0425     prop->dmmu.hop_shifts[MMU_HOP3] = MMU_V1_0_HOP3_SHIFT;
0426     prop->dmmu.hop_shifts[MMU_HOP4] = MMU_V1_0_HOP4_SHIFT;
0427     prop->dmmu.hop_masks[MMU_HOP0] = MMU_V1_0_HOP0_MASK;
0428     prop->dmmu.hop_masks[MMU_HOP1] = MMU_V1_0_HOP1_MASK;
0429     prop->dmmu.hop_masks[MMU_HOP2] = MMU_V1_0_HOP2_MASK;
0430     prop->dmmu.hop_masks[MMU_HOP3] = MMU_V1_0_HOP3_MASK;
0431     prop->dmmu.hop_masks[MMU_HOP4] = MMU_V1_0_HOP4_MASK;
0432     prop->dmmu.start_addr = VA_DDR_SPACE_START;
0433     prop->dmmu.end_addr = VA_DDR_SPACE_END;
0434     prop->dmmu.page_size = PAGE_SIZE_2MB;
0435     prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
0436     prop->dmmu.last_mask = LAST_MASK;
0437     /* TODO: will be duplicated until implementing per-MMU props */
0438     prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
0439     prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
0440 
0441     /* shifts and masks are the same in PMMU and DMMU */
0442     memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
0443     prop->pmmu.start_addr = VA_HOST_SPACE_START;
0444     prop->pmmu.end_addr = VA_HOST_SPACE_END;
0445     prop->pmmu.page_size = PAGE_SIZE_4KB;
0446     prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
0447     prop->pmmu.last_mask = LAST_MASK;
0448     /* TODO: will be duplicated until implementing per-MMU props */
0449     prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
0450     prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
0451 
0452     /* PMMU and HPMMU are the same except of page size */
0453     memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
0454     prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
0455 
0456     prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
0457     prop->cfg_size = CFG_SIZE;
0458     prop->max_asid = MAX_ASID;
0459     prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
0460     prop->high_pll = PLL_HIGH_DEFAULT;
0461     prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
0462     prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
0463     prop->max_power_default = MAX_POWER_DEFAULT;
0464     prop->dc_power_default = DC_POWER_DEFAULT;
0465     prop->tpc_enabled_mask = TPC_ENABLED_MASK;
0466     prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
0467     prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
0468 
0469     strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
0470         CARD_NAME_MAX_LEN);
0471 
0472     prop->max_pending_cs = GOYA_MAX_PENDING_CS;
0473 
0474     prop->first_available_user_interrupt = USHRT_MAX;
0475 
0476     for (i = 0 ; i < HL_MAX_DCORES ; i++)
0477         prop->first_available_cq[i] = USHRT_MAX;
0478 
0479     prop->fw_cpu_boot_dev_sts0_valid = false;
0480     prop->fw_cpu_boot_dev_sts1_valid = false;
0481     prop->hard_reset_done_by_fw = false;
0482     prop->gic_interrupts_enable = true;
0483 
0484     prop->server_type = HL_SERVER_TYPE_UNKNOWN;
0485 
0486     prop->clk_pll_index = HL_GOYA_MME_PLL;
0487 
0488     prop->use_get_power_for_reset_history = true;
0489 
0490     prop->configurable_stop_on_err = true;
0491 
0492     prop->set_max_power_on_device_init = true;
0493 
0494     prop->dma_mask = 48;
0495 
0496     return 0;
0497 }
0498 
0499 /*
0500  * goya_pci_bars_map - Map PCI BARS of Goya device
0501  *
0502  * @hdev: pointer to hl_device structure
0503  *
0504  * Request PCI regions and map them to kernel virtual addresses.
0505  * Returns 0 on success
0506  *
0507  */
0508 static int goya_pci_bars_map(struct hl_device *hdev)
0509 {
0510     static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
0511     bool is_wc[3] = {false, false, true};
0512     int rc;
0513 
0514     rc = hl_pci_bars_map(hdev, name, is_wc);
0515     if (rc)
0516         return rc;
0517 
0518     hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
0519             (CFG_BASE - SRAM_BASE_ADDR);
0520 
0521     return 0;
0522 }
0523 
0524 static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
0525 {
0526     struct goya_device *goya = hdev->asic_specific;
0527     struct hl_inbound_pci_region pci_region;
0528     u64 old_addr = addr;
0529     int rc;
0530 
0531     if ((goya) && (goya->ddr_bar_cur_addr == addr))
0532         return old_addr;
0533 
0534     /* Inbound Region 1 - Bar 4 - Point to DDR */
0535     pci_region.mode = PCI_BAR_MATCH_MODE;
0536     pci_region.bar = DDR_BAR_ID;
0537     pci_region.addr = addr;
0538     rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
0539     if (rc)
0540         return U64_MAX;
0541 
0542     if (goya) {
0543         old_addr = goya->ddr_bar_cur_addr;
0544         goya->ddr_bar_cur_addr = addr;
0545     }
0546 
0547     return old_addr;
0548 }
0549 
0550 /*
0551  * goya_init_iatu - Initialize the iATU unit inside the PCI controller
0552  *
0553  * @hdev: pointer to hl_device structure
0554  *
0555  * This is needed in case the firmware doesn't initialize the iATU
0556  *
0557  */
0558 static int goya_init_iatu(struct hl_device *hdev)
0559 {
0560     struct hl_inbound_pci_region inbound_region;
0561     struct hl_outbound_pci_region outbound_region;
0562     int rc;
0563 
0564     if (hdev->asic_prop.iatu_done_by_fw)
0565         return 0;
0566 
0567     /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
0568     inbound_region.mode = PCI_BAR_MATCH_MODE;
0569     inbound_region.bar = SRAM_CFG_BAR_ID;
0570     inbound_region.addr = SRAM_BASE_ADDR;
0571     rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
0572     if (rc)
0573         goto done;
0574 
0575     /* Inbound Region 1 - Bar 4 - Point to DDR */
0576     inbound_region.mode = PCI_BAR_MATCH_MODE;
0577     inbound_region.bar = DDR_BAR_ID;
0578     inbound_region.addr = DRAM_PHYS_BASE;
0579     rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
0580     if (rc)
0581         goto done;
0582 
0583     /* Outbound Region 0 - Point to Host  */
0584     outbound_region.addr = HOST_PHYS_BASE;
0585     outbound_region.size = HOST_PHYS_SIZE;
0586     rc = hl_pci_set_outbound_region(hdev, &outbound_region);
0587 
0588 done:
0589     return rc;
0590 }
0591 
0592 static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
0593 {
0594     return RREG32(mmHW_STATE);
0595 }
0596 
0597 /*
0598  * goya_early_init - GOYA early initialization code
0599  *
0600  * @hdev: pointer to hl_device structure
0601  *
0602  * Verify PCI bars
0603  * Set DMA masks
0604  * PCI controller initialization
0605  * Map PCI bars
0606  *
0607  */
0608 static int goya_early_init(struct hl_device *hdev)
0609 {
0610     struct asic_fixed_properties *prop = &hdev->asic_prop;
0611     struct pci_dev *pdev = hdev->pdev;
0612     resource_size_t pci_bar_size;
0613     u32 fw_boot_status, val;
0614     int rc;
0615 
0616     rc = goya_set_fixed_properties(hdev);
0617     if (rc) {
0618         dev_err(hdev->dev, "Failed to get fixed properties\n");
0619         return rc;
0620     }
0621 
0622     /* Check BAR sizes */
0623     pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
0624 
0625     if (pci_bar_size != CFG_BAR_SIZE) {
0626         dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
0627             SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
0628         rc = -ENODEV;
0629         goto free_queue_props;
0630     }
0631 
0632     pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
0633 
0634     if (pci_bar_size != MSIX_BAR_SIZE) {
0635         dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
0636             MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
0637         rc = -ENODEV;
0638         goto free_queue_props;
0639     }
0640 
0641     prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
0642     hdev->dram_pci_bar_start = pci_resource_start(pdev, DDR_BAR_ID);
0643 
0644     /* If FW security is enabled at this point it means no access to ELBI */
0645     if (hdev->asic_prop.fw_security_enabled) {
0646         hdev->asic_prop.iatu_done_by_fw = true;
0647         goto pci_init;
0648     }
0649 
0650     rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
0651                 &fw_boot_status);
0652     if (rc)
0653         goto free_queue_props;
0654 
0655     /* Check whether FW is configuring iATU */
0656     if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
0657             (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
0658         hdev->asic_prop.iatu_done_by_fw = true;
0659 
0660 pci_init:
0661     rc = hl_pci_init(hdev);
0662     if (rc)
0663         goto free_queue_props;
0664 
0665     /* Before continuing in the initialization, we need to read the preboot
0666      * version to determine whether we run with a security-enabled firmware
0667      */
0668     rc = hl_fw_read_preboot_status(hdev);
0669     if (rc) {
0670         if (hdev->reset_on_preboot_fail)
0671             hdev->asic_funcs->hw_fini(hdev, true, false);
0672         goto pci_fini;
0673     }
0674 
0675     if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
0676         dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
0677         hdev->asic_funcs->hw_fini(hdev, true, false);
0678     }
0679 
0680     if (!hdev->pldm) {
0681         val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
0682         if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
0683             dev_warn(hdev->dev,
0684                 "PCI strap is not configured correctly, PCI bus errors may occur\n");
0685     }
0686 
0687     return 0;
0688 
0689 pci_fini:
0690     hl_pci_fini(hdev);
0691 free_queue_props:
0692     kfree(hdev->asic_prop.hw_queues_props);
0693     return rc;
0694 }
0695 
0696 /*
0697  * goya_early_fini - GOYA early finalization code
0698  *
0699  * @hdev: pointer to hl_device structure
0700  *
0701  * Unmap PCI bars
0702  *
0703  */
0704 static int goya_early_fini(struct hl_device *hdev)
0705 {
0706     kfree(hdev->asic_prop.hw_queues_props);
0707     hl_pci_fini(hdev);
0708 
0709     return 0;
0710 }
0711 
0712 static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
0713 {
0714     /* mask to zero the MMBP and ASID bits */
0715     WREG32_AND(reg, ~0x7FF);
0716     WREG32_OR(reg, asid);
0717 }
0718 
0719 static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
0720 {
0721     struct goya_device *goya = hdev->asic_specific;
0722 
0723     if (!(goya->hw_cap_initialized & HW_CAP_MMU))
0724         return;
0725 
0726     if (secure)
0727         WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
0728     else
0729         WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
0730 
0731     RREG32(mmDMA_QM_0_GLBL_PROT);
0732 }
0733 
0734 /*
0735  * goya_fetch_psoc_frequency - Fetch PSOC frequency values
0736  *
0737  * @hdev: pointer to hl_device structure
0738  *
0739  */
0740 static void goya_fetch_psoc_frequency(struct hl_device *hdev)
0741 {
0742     struct asic_fixed_properties *prop = &hdev->asic_prop;
0743     u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
0744     u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
0745     int rc;
0746 
0747     if (hdev->asic_prop.fw_security_enabled) {
0748         struct goya_device *goya = hdev->asic_specific;
0749 
0750         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
0751             return;
0752 
0753         rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
0754                 pll_freq_arr);
0755 
0756         if (rc)
0757             return;
0758 
0759         freq = pll_freq_arr[1];
0760     } else {
0761         div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
0762         div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
0763         nr = RREG32(mmPSOC_PCI_PLL_NR);
0764         nf = RREG32(mmPSOC_PCI_PLL_NF);
0765         od = RREG32(mmPSOC_PCI_PLL_OD);
0766 
0767         if (div_sel == DIV_SEL_REF_CLK ||
0768                 div_sel == DIV_SEL_DIVIDED_REF) {
0769             if (div_sel == DIV_SEL_REF_CLK)
0770                 freq = PLL_REF_CLK;
0771             else
0772                 freq = PLL_REF_CLK / (div_fctr + 1);
0773         } else if (div_sel == DIV_SEL_PLL_CLK ||
0774                 div_sel == DIV_SEL_DIVIDED_PLL) {
0775             pll_clk = PLL_REF_CLK * (nf + 1) /
0776                     ((nr + 1) * (od + 1));
0777             if (div_sel == DIV_SEL_PLL_CLK)
0778                 freq = pll_clk;
0779             else
0780                 freq = pll_clk / (div_fctr + 1);
0781         } else {
0782             dev_warn(hdev->dev,
0783                 "Received invalid div select value: %d",
0784                 div_sel);
0785             freq = 0;
0786         }
0787     }
0788 
0789     prop->psoc_timestamp_frequency = freq;
0790     prop->psoc_pci_pll_nr = nr;
0791     prop->psoc_pci_pll_nf = nf;
0792     prop->psoc_pci_pll_od = od;
0793     prop->psoc_pci_pll_div_factor = div_fctr;
0794 }
0795 
0796 /*
0797  * goya_set_frequency - set the frequency of the device
0798  *
0799  * @hdev: pointer to habanalabs device structure
0800  * @freq: the new frequency value
0801  *
0802  * Change the frequency if needed. This function has no protection against
0803  * concurrency, therefore it is assumed that the calling function has protected
0804  * itself against the case of calling this function from multiple threads with
0805  * different values
0806  *
0807  * Returns 0 if no change was done, otherwise returns 1
0808  */
0809 int goya_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
0810 {
0811     struct goya_device *goya = hdev->asic_specific;
0812 
0813     if ((goya->pm_mng_profile == PM_MANUAL) ||
0814             (goya->curr_pll_profile == freq))
0815         return 0;
0816 
0817     dev_dbg(hdev->dev, "Changing device frequency to %s\n",
0818         freq == PLL_HIGH ? "high" : "low");
0819 
0820     goya_set_pll_profile(hdev, freq);
0821 
0822     goya->curr_pll_profile = freq;
0823 
0824     return 1;
0825 }
0826 
0827 static void goya_set_freq_to_low_job(struct work_struct *work)
0828 {
0829     struct goya_work_freq *goya_work = container_of(work,
0830                         struct goya_work_freq,
0831                         work_freq.work);
0832     struct hl_device *hdev = goya_work->hdev;
0833 
0834     mutex_lock(&hdev->fpriv_list_lock);
0835 
0836     if (!hdev->is_compute_ctx_active)
0837         goya_set_frequency(hdev, PLL_LOW);
0838 
0839     mutex_unlock(&hdev->fpriv_list_lock);
0840 
0841     schedule_delayed_work(&goya_work->work_freq,
0842             usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
0843 }
0844 
0845 int goya_late_init(struct hl_device *hdev)
0846 {
0847     struct asic_fixed_properties *prop = &hdev->asic_prop;
0848     struct goya_device *goya = hdev->asic_specific;
0849     int rc;
0850 
0851     goya_fetch_psoc_frequency(hdev);
0852 
0853     rc = goya_mmu_clear_pgt_range(hdev);
0854     if (rc) {
0855         dev_err(hdev->dev,
0856             "Failed to clear MMU page tables range %d\n", rc);
0857         return rc;
0858     }
0859 
0860     rc = goya_mmu_set_dram_default_page(hdev);
0861     if (rc) {
0862         dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
0863         return rc;
0864     }
0865 
0866     rc = goya_mmu_add_mappings_for_device_cpu(hdev);
0867     if (rc)
0868         return rc;
0869 
0870     rc = goya_init_cpu_queues(hdev);
0871     if (rc)
0872         return rc;
0873 
0874     rc = goya_test_cpu_queue(hdev);
0875     if (rc)
0876         return rc;
0877 
0878     rc = goya_cpucp_info_get(hdev);
0879     if (rc) {
0880         dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
0881         return rc;
0882     }
0883 
0884     /* Now that we have the DRAM size in ASIC prop, we need to check
0885      * its size and configure the DMA_IF DDR wrap protection (which is in
0886      * the MMU block) accordingly. The value is the log2 of the DRAM size
0887      */
0888     WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
0889 
0890     rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
0891     if (rc) {
0892         dev_err(hdev->dev,
0893             "Failed to enable PCI access from CPU %d\n", rc);
0894         return rc;
0895     }
0896 
0897     /* force setting to low frequency */
0898     goya->curr_pll_profile = PLL_LOW;
0899 
0900     goya->pm_mng_profile = PM_AUTO;
0901 
0902     goya_set_pll_profile(hdev, PLL_LOW);
0903 
0904     schedule_delayed_work(&goya->goya_work->work_freq,
0905         usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
0906 
0907     return 0;
0908 }
0909 
0910 /*
0911  * goya_late_fini - GOYA late tear-down code
0912  *
0913  * @hdev: pointer to hl_device structure
0914  *
0915  * Free sensors allocated structures
0916  */
0917 void goya_late_fini(struct hl_device *hdev)
0918 {
0919     const struct hwmon_channel_info **channel_info_arr;
0920     struct goya_device *goya = hdev->asic_specific;
0921     int i = 0;
0922 
0923     cancel_delayed_work_sync(&goya->goya_work->work_freq);
0924 
0925     if (!hdev->hl_chip_info->info)
0926         return;
0927 
0928     channel_info_arr = hdev->hl_chip_info->info;
0929 
0930     while (channel_info_arr[i]) {
0931         kfree(channel_info_arr[i]->config);
0932         kfree(channel_info_arr[i]);
0933         i++;
0934     }
0935 
0936     kfree(channel_info_arr);
0937 
0938     hdev->hl_chip_info->info = NULL;
0939 }
0940 
0941 static void goya_set_pci_memory_regions(struct hl_device *hdev)
0942 {
0943     struct asic_fixed_properties *prop = &hdev->asic_prop;
0944     struct pci_mem_region *region;
0945 
0946     /* CFG */
0947     region = &hdev->pci_mem_region[PCI_REGION_CFG];
0948     region->region_base = CFG_BASE;
0949     region->region_size = CFG_SIZE;
0950     region->offset_in_bar = CFG_BASE - SRAM_BASE_ADDR;
0951     region->bar_size = CFG_BAR_SIZE;
0952     region->bar_id = SRAM_CFG_BAR_ID;
0953     region->used = 1;
0954 
0955     /* SRAM */
0956     region = &hdev->pci_mem_region[PCI_REGION_SRAM];
0957     region->region_base = SRAM_BASE_ADDR;
0958     region->region_size = SRAM_SIZE;
0959     region->offset_in_bar = 0;
0960     region->bar_size = CFG_BAR_SIZE;
0961     region->bar_id = SRAM_CFG_BAR_ID;
0962     region->used = 1;
0963 
0964     /* DRAM */
0965     region = &hdev->pci_mem_region[PCI_REGION_DRAM];
0966     region->region_base = DRAM_PHYS_BASE;
0967     region->region_size = hdev->asic_prop.dram_size;
0968     region->offset_in_bar = 0;
0969     region->bar_size = prop->dram_pci_bar_size;
0970     region->bar_id = DDR_BAR_ID;
0971     region->used = 1;
0972 }
0973 
0974 /*
0975  * goya_sw_init - Goya software initialization code
0976  *
0977  * @hdev: pointer to hl_device structure
0978  *
0979  */
0980 static int goya_sw_init(struct hl_device *hdev)
0981 {
0982     struct goya_device *goya;
0983     int rc;
0984 
0985     /* Allocate device structure */
0986     goya = kzalloc(sizeof(*goya), GFP_KERNEL);
0987     if (!goya)
0988         return -ENOMEM;
0989 
0990     /* according to goya_init_iatu */
0991     goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
0992 
0993     goya->mme_clk = GOYA_PLL_FREQ_LOW;
0994     goya->tpc_clk = GOYA_PLL_FREQ_LOW;
0995     goya->ic_clk = GOYA_PLL_FREQ_LOW;
0996 
0997     hdev->asic_specific = goya;
0998 
0999     /* Create DMA pool for small allocations */
1000     hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1001             &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
1002     if (!hdev->dma_pool) {
1003         dev_err(hdev->dev, "failed to create DMA pool\n");
1004         rc = -ENOMEM;
1005         goto free_goya_device;
1006     }
1007 
1008     hdev->cpu_accessible_dma_mem = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1009                             &hdev->cpu_accessible_dma_address,
1010                             GFP_KERNEL | __GFP_ZERO);
1011 
1012     if (!hdev->cpu_accessible_dma_mem) {
1013         rc = -ENOMEM;
1014         goto free_dma_pool;
1015     }
1016 
1017     dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
1018         &hdev->cpu_accessible_dma_address);
1019 
1020     hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1021     if (!hdev->cpu_accessible_dma_pool) {
1022         dev_err(hdev->dev,
1023             "Failed to create CPU accessible DMA pool\n");
1024         rc = -ENOMEM;
1025         goto free_cpu_dma_mem;
1026     }
1027 
1028     rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1029                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1030                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1031     if (rc) {
1032         dev_err(hdev->dev,
1033             "Failed to add memory to CPU accessible DMA pool\n");
1034         rc = -EFAULT;
1035         goto free_cpu_accessible_dma_pool;
1036     }
1037 
1038     spin_lock_init(&goya->hw_queues_lock);
1039     hdev->supports_coresight = true;
1040     hdev->asic_prop.supports_compute_reset = true;
1041     hdev->asic_prop.allow_inference_soft_reset = true;
1042     hdev->supports_wait_for_multi_cs = false;
1043 
1044     hdev->asic_funcs->set_pci_memory_regions(hdev);
1045 
1046     goya->goya_work = kmalloc(sizeof(struct goya_work_freq), GFP_KERNEL);
1047     if (!goya->goya_work) {
1048         rc = -ENOMEM;
1049         goto free_cpu_accessible_dma_pool;
1050     }
1051 
1052     goya->goya_work->hdev = hdev;
1053     INIT_DELAYED_WORK(&goya->goya_work->work_freq, goya_set_freq_to_low_job);
1054 
1055     return 0;
1056 
1057 free_cpu_accessible_dma_pool:
1058     gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1059 free_cpu_dma_mem:
1060     hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1061                     hdev->cpu_accessible_dma_address);
1062 free_dma_pool:
1063     dma_pool_destroy(hdev->dma_pool);
1064 free_goya_device:
1065     kfree(goya);
1066 
1067     return rc;
1068 }
1069 
1070 /*
1071  * goya_sw_fini - Goya software tear-down code
1072  *
1073  * @hdev: pointer to hl_device structure
1074  *
1075  */
1076 static int goya_sw_fini(struct hl_device *hdev)
1077 {
1078     struct goya_device *goya = hdev->asic_specific;
1079 
1080     gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1081 
1082     hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1083                     hdev->cpu_accessible_dma_address);
1084 
1085     dma_pool_destroy(hdev->dma_pool);
1086 
1087     kfree(goya->goya_work);
1088     kfree(goya);
1089 
1090     return 0;
1091 }
1092 
1093 static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
1094         dma_addr_t bus_address)
1095 {
1096     struct goya_device *goya = hdev->asic_specific;
1097     u32 mtr_base_lo, mtr_base_hi;
1098     u32 so_base_lo, so_base_hi;
1099     u32 gic_base_lo, gic_base_hi;
1100     u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
1101     u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
1102 
1103     mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1104     mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1105     so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1106     so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1107 
1108     gic_base_lo =
1109         lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1110     gic_base_hi =
1111         upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1112 
1113     WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
1114     WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
1115 
1116     WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
1117     WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
1118     WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
1119 
1120     WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1121     WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1122     WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1123     WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1124     WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1125     WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1126     WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
1127             GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
1128 
1129     /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
1130     WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
1131     WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
1132 
1133     if (goya->hw_cap_initialized & HW_CAP_MMU)
1134         WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
1135     else
1136         WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
1137 
1138     if (hdev->stop_on_err)
1139         dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
1140 
1141     WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
1142     WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
1143 }
1144 
1145 static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
1146 {
1147     u32 gic_base_lo, gic_base_hi;
1148     u64 sob_addr;
1149     u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
1150 
1151     gic_base_lo =
1152         lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1153     gic_base_hi =
1154         upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1155 
1156     WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
1157     WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
1158     WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
1159             GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
1160 
1161     if (dma_id)
1162         sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
1163                 (dma_id - 1) * 4;
1164     else
1165         sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
1166 
1167     WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
1168     WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
1169 }
1170 
1171 /*
1172  * goya_init_dma_qmans - Initialize QMAN DMA registers
1173  *
1174  * @hdev: pointer to hl_device structure
1175  *
1176  * Initialize the H/W registers of the QMAN DMA channels
1177  *
1178  */
1179 void goya_init_dma_qmans(struct hl_device *hdev)
1180 {
1181     struct goya_device *goya = hdev->asic_specific;
1182     struct hl_hw_queue *q;
1183     int i;
1184 
1185     if (goya->hw_cap_initialized & HW_CAP_DMA)
1186         return;
1187 
1188     q = &hdev->kernel_queues[0];
1189 
1190     for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
1191         q->cq_id = q->msi_vec = i;
1192         goya_init_dma_qman(hdev, i, q->bus_address);
1193         goya_init_dma_ch(hdev, i);
1194     }
1195 
1196     goya->hw_cap_initialized |= HW_CAP_DMA;
1197 }
1198 
1199 /*
1200  * goya_disable_external_queues - Disable external queues
1201  *
1202  * @hdev: pointer to hl_device structure
1203  *
1204  */
1205 static void goya_disable_external_queues(struct hl_device *hdev)
1206 {
1207     struct goya_device *goya = hdev->asic_specific;
1208 
1209     if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1210         return;
1211 
1212     WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1213     WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1214     WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1215     WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1216     WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1217 }
1218 
1219 static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1220                 u32 cp_sts_reg, u32 glbl_sts0_reg)
1221 {
1222     int rc;
1223     u32 status;
1224 
1225     /* use the values of TPC0 as they are all the same*/
1226 
1227     WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1228 
1229     status = RREG32(cp_sts_reg);
1230     if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1231         rc = hl_poll_timeout(
1232             hdev,
1233             cp_sts_reg,
1234             status,
1235             !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1236             1000,
1237             QMAN_FENCE_TIMEOUT_USEC);
1238 
1239         /* if QMAN is stuck in fence no need to check for stop */
1240         if (rc)
1241             return 0;
1242     }
1243 
1244     rc = hl_poll_timeout(
1245         hdev,
1246         glbl_sts0_reg,
1247         status,
1248         (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1249         1000,
1250         QMAN_STOP_TIMEOUT_USEC);
1251 
1252     if (rc) {
1253         dev_err(hdev->dev,
1254             "Timeout while waiting for QMAN to stop\n");
1255         return -EINVAL;
1256     }
1257 
1258     return 0;
1259 }
1260 
1261 /*
1262  * goya_stop_external_queues - Stop external queues
1263  *
1264  * @hdev: pointer to hl_device structure
1265  *
1266  * Returns 0 on success
1267  *
1268  */
1269 static int goya_stop_external_queues(struct hl_device *hdev)
1270 {
1271     int rc, retval = 0;
1272 
1273     struct goya_device *goya = hdev->asic_specific;
1274 
1275     if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1276         return retval;
1277 
1278     rc = goya_stop_queue(hdev,
1279             mmDMA_QM_0_GLBL_CFG1,
1280             mmDMA_QM_0_CP_STS,
1281             mmDMA_QM_0_GLBL_STS0);
1282 
1283     if (rc) {
1284         dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1285         retval = -EIO;
1286     }
1287 
1288     rc = goya_stop_queue(hdev,
1289             mmDMA_QM_1_GLBL_CFG1,
1290             mmDMA_QM_1_CP_STS,
1291             mmDMA_QM_1_GLBL_STS0);
1292 
1293     if (rc) {
1294         dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1295         retval = -EIO;
1296     }
1297 
1298     rc = goya_stop_queue(hdev,
1299             mmDMA_QM_2_GLBL_CFG1,
1300             mmDMA_QM_2_CP_STS,
1301             mmDMA_QM_2_GLBL_STS0);
1302 
1303     if (rc) {
1304         dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1305         retval = -EIO;
1306     }
1307 
1308     rc = goya_stop_queue(hdev,
1309             mmDMA_QM_3_GLBL_CFG1,
1310             mmDMA_QM_3_CP_STS,
1311             mmDMA_QM_3_GLBL_STS0);
1312 
1313     if (rc) {
1314         dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1315         retval = -EIO;
1316     }
1317 
1318     rc = goya_stop_queue(hdev,
1319             mmDMA_QM_4_GLBL_CFG1,
1320             mmDMA_QM_4_CP_STS,
1321             mmDMA_QM_4_GLBL_STS0);
1322 
1323     if (rc) {
1324         dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1325         retval = -EIO;
1326     }
1327 
1328     return retval;
1329 }
1330 
1331 /*
1332  * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1333  *
1334  * @hdev: pointer to hl_device structure
1335  *
1336  * Returns 0 on success
1337  *
1338  */
1339 int goya_init_cpu_queues(struct hl_device *hdev)
1340 {
1341     struct goya_device *goya = hdev->asic_specific;
1342     struct asic_fixed_properties *prop = &hdev->asic_prop;
1343     struct hl_eq *eq;
1344     u32 status;
1345     struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1346     int err;
1347 
1348     if (!hdev->cpu_queues_enable)
1349         return 0;
1350 
1351     if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1352         return 0;
1353 
1354     eq = &hdev->event_queue;
1355 
1356     WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1357     WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1358 
1359     WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1360     WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1361 
1362     WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1363             lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1364     WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1365             upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1366 
1367     WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1368     WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1369     WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1370 
1371     /* Used for EQ CI */
1372     WREG32(mmCPU_EQ_CI, 0);
1373 
1374     WREG32(mmCPU_IF_PF_PQ_PI, 0);
1375 
1376     WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1377 
1378     WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1379             GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1380 
1381     err = hl_poll_timeout(
1382         hdev,
1383         mmCPU_PQ_INIT_STATUS,
1384         status,
1385         (status == PQ_INIT_STATUS_READY_FOR_HOST),
1386         1000,
1387         GOYA_CPU_TIMEOUT_USEC);
1388 
1389     if (err) {
1390         dev_err(hdev->dev,
1391             "Failed to setup communication with device CPU\n");
1392         return -EIO;
1393     }
1394 
1395     /* update FW application security bits */
1396     if (prop->fw_cpu_boot_dev_sts0_valid)
1397         prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
1398 
1399     if (prop->fw_cpu_boot_dev_sts1_valid)
1400         prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
1401 
1402     goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1403     return 0;
1404 }
1405 
1406 static void goya_set_pll_refclk(struct hl_device *hdev)
1407 {
1408     WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1409     WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1410     WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1411     WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1412 
1413     WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1414     WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1415     WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1416     WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1417 
1418     WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1419     WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1420     WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1421     WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1422 
1423     WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1424     WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1425     WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1426     WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1427 
1428     WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1429     WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1430     WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1431     WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1432 
1433     WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1434     WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1435     WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1436     WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1437 
1438     WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1439     WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1440     WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1441     WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1442 }
1443 
1444 static void goya_disable_clk_rlx(struct hl_device *hdev)
1445 {
1446     WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1447     WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1448 }
1449 
1450 static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1451 {
1452     u64 tpc_eml_address;
1453     u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1454     int err, slm_index;
1455 
1456     tpc_offset = tpc_id * 0x40000;
1457     tpc_eml_offset = tpc_id * 0x200000;
1458     tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1459     tpc_slm_offset = tpc_eml_address + 0x100000;
1460 
1461     /*
1462      * Workaround for Bug H2 #2443 :
1463      * "TPC SB is not initialized on chip reset"
1464      */
1465 
1466     val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1467     if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1468         dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1469             tpc_id);
1470 
1471     WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1472 
1473     WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1474     WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1475     WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1476     WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1477     WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1478     WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1479     WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1480     WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1481     WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1482     WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1483 
1484     WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1485         1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1486 
1487     err = hl_poll_timeout(
1488         hdev,
1489         mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1490         val,
1491         (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1492         1000,
1493         HL_DEVICE_TIMEOUT_USEC);
1494 
1495     if (err)
1496         dev_err(hdev->dev,
1497             "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1498 
1499     WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1500         1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1501 
1502     msleep(GOYA_RESET_WAIT_MSEC);
1503 
1504     WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1505         ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1506 
1507     msleep(GOYA_RESET_WAIT_MSEC);
1508 
1509     for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1510         WREG32(tpc_slm_offset + (slm_index << 2), 0);
1511 
1512     val = RREG32(tpc_slm_offset);
1513 }
1514 
1515 static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1516 {
1517     struct goya_device *goya = hdev->asic_specific;
1518     int i;
1519 
1520     if (hdev->pldm)
1521         return;
1522 
1523     if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1524         return;
1525 
1526     /* Workaround for H2 #2443 */
1527 
1528     for (i = 0 ; i < TPC_MAX_NUM ; i++)
1529         _goya_tpc_mbist_workaround(hdev, i);
1530 
1531     goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1532 }
1533 
1534 /*
1535  * goya_init_golden_registers - Initialize golden registers
1536  *
1537  * @hdev: pointer to hl_device structure
1538  *
1539  * Initialize the H/W registers of the device
1540  *
1541  */
1542 static void goya_init_golden_registers(struct hl_device *hdev)
1543 {
1544     struct goya_device *goya = hdev->asic_specific;
1545     u32 polynom[10], tpc_intr_mask, offset;
1546     int i;
1547 
1548     if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1549         return;
1550 
1551     polynom[0] = 0x00020080;
1552     polynom[1] = 0x00401000;
1553     polynom[2] = 0x00200800;
1554     polynom[3] = 0x00002000;
1555     polynom[4] = 0x00080200;
1556     polynom[5] = 0x00040100;
1557     polynom[6] = 0x00100400;
1558     polynom[7] = 0x00004000;
1559     polynom[8] = 0x00010000;
1560     polynom[9] = 0x00008000;
1561 
1562     /* Mask all arithmetic interrupts from TPC */
1563     tpc_intr_mask = 0x7FFF;
1564 
1565     for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1566         WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1567         WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1568         WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1569         WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1570         WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1571 
1572         WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1573         WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1574         WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1575         WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1576         WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1577 
1578 
1579         WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1580         WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1581         WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1582         WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1583         WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1584 
1585         WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1586         WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1587         WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1588         WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1589         WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1590 
1591         WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1592         WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1593         WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1594         WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1595         WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1596 
1597         WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1598         WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1599         WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1600         WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1601         WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1602     }
1603 
1604     WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1605     WREG32(mmMME_AGU, 0x0f0f0f10);
1606     WREG32(mmMME_SEI_MASK, ~0x0);
1607 
1608     WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1609     WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1610     WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1611     WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1612     WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1613     WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1614     WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1615     WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1616     WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1617     WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1618     WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1619     WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1620     WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1621     WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1622     WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1623     WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1624     WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1625     WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1626     WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1627     WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1628     WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1629     WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1630     WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1631     WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1632     WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1633     WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1634     WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1635     WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1636     WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1637     WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1638     WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1639     WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1640     WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1641     WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1642     WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1643     WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1644     WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1645     WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1646     WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1647     WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1648     WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1649     WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1650     WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1651     WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1652     WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1653     WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1654     WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1655     WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1656     WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1657     WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1658     WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1659     WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1660     WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1661     WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1662     WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1663     WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1664     WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1665     WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1666     WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1667     WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1668     WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1669     WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1670     WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1671     WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1672     WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1673     WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1674     WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1675     WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1676     WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1677     WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1678     WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1679     WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1680     WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1681     WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1682     WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1683     WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1684     WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1685     WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1686     WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1687     WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1688     WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1689     WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1690     WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1691     WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1692 
1693     WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1694     WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1695     WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1696     WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1697     WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1698     WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1699     WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1700     WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1701     WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1702     WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1703     WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1704     WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1705 
1706     WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1707     WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1708     WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1709     WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1710     WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1711     WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1712     WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1713     WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1714     WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1715     WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1716     WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1717     WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1718 
1719     WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1720     WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1721     WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1722     WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1723     WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1724     WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1725     WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1726     WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1727     WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1728     WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1729     WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1730     WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1731 
1732     WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1733     WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1734     WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1735     WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1736     WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1737     WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1738     WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1739     WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1740     WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1741     WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1742     WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1743     WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1744 
1745     WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1746     WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1747     WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1748     WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1749     WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1750     WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1751     WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1752     WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1753     WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1754     WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1755     WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1756     WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1757 
1758     WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1759     WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1760     WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1761     WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1762     WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1763     WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1764     WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1765     WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1766     WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1767     WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1768     WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1769     WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1770 
1771     for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1772         WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1773         WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1774         WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1775         WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1776         WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1777         WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1778 
1779         WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1780         WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1781         WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1782         WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1783         WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1784         WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1785         WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1786         WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1787 
1788         WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1789         WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1790     }
1791 
1792     for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1793         WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1794                 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1795         WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1796                 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1797     }
1798 
1799     for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1800         /*
1801          * Workaround for Bug H2 #2441 :
1802          * "ST.NOP set trace event illegal opcode"
1803          */
1804         WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1805 
1806         WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1807                 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1808         WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1809                 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1810 
1811         WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1812                 ICACHE_FETCH_LINE_NUM, 2);
1813     }
1814 
1815     WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1816     WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1817             1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1818 
1819     WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1820     WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1821             1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1822 
1823     /*
1824      * Workaround for H2 #HW-23 bug
1825      * Set DMA max outstanding read requests to 240 on DMA CH 1.
1826      * This limitation is still large enough to not affect Gen4 bandwidth.
1827      * We need to only limit that DMA channel because the user can only read
1828      * from Host using DMA CH 1
1829      */
1830     WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1831 
1832     WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1833 
1834     goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1835 }
1836 
1837 static void goya_init_mme_qman(struct hl_device *hdev)
1838 {
1839     u32 mtr_base_lo, mtr_base_hi;
1840     u32 so_base_lo, so_base_hi;
1841     u32 gic_base_lo, gic_base_hi;
1842     u64 qman_base_addr;
1843 
1844     mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1845     mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1846     so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1847     so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1848 
1849     gic_base_lo =
1850         lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1851     gic_base_hi =
1852         upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1853 
1854     qman_base_addr = hdev->asic_prop.sram_base_address +
1855                 MME_QMAN_BASE_OFFSET;
1856 
1857     WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1858     WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1859     WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1860     WREG32(mmMME_QM_PQ_PI, 0);
1861     WREG32(mmMME_QM_PQ_CI, 0);
1862     WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1863     WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1864     WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1865     WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1866 
1867     WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1868     WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1869     WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1870     WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1871 
1872     /* QMAN CQ has 8 cache lines */
1873     WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1874 
1875     WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1876     WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1877 
1878     WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1879 
1880     WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1881 
1882     WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1883 
1884     WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1885 }
1886 
1887 static void goya_init_mme_cmdq(struct hl_device *hdev)
1888 {
1889     u32 mtr_base_lo, mtr_base_hi;
1890     u32 so_base_lo, so_base_hi;
1891     u32 gic_base_lo, gic_base_hi;
1892 
1893     mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1894     mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1895     so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1896     so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1897 
1898     gic_base_lo =
1899         lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1900     gic_base_hi =
1901         upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1902 
1903     WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1904     WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1905     WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1906     WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1907 
1908     /* CMDQ CQ has 20 cache lines */
1909     WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1910 
1911     WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1912     WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1913 
1914     WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1915 
1916     WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1917 
1918     WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1919 
1920     WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1921 }
1922 
1923 void goya_init_mme_qmans(struct hl_device *hdev)
1924 {
1925     struct goya_device *goya = hdev->asic_specific;
1926     u32 so_base_lo, so_base_hi;
1927 
1928     if (goya->hw_cap_initialized & HW_CAP_MME)
1929         return;
1930 
1931     so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1932     so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1933 
1934     WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1935     WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1936 
1937     goya_init_mme_qman(hdev);
1938     goya_init_mme_cmdq(hdev);
1939 
1940     goya->hw_cap_initialized |= HW_CAP_MME;
1941 }
1942 
1943 static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1944 {
1945     u32 mtr_base_lo, mtr_base_hi;
1946     u32 so_base_lo, so_base_hi;
1947     u32 gic_base_lo, gic_base_hi;
1948     u64 qman_base_addr;
1949     u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1950 
1951     mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1952     mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1953     so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1954     so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1955 
1956     gic_base_lo =
1957         lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1958     gic_base_hi =
1959         upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1960 
1961     qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1962 
1963     WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1964     WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1965     WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1966     WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1967     WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1968     WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1969     WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1970     WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1971     WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1972 
1973     WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1974     WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1975     WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1976     WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1977 
1978     WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1979 
1980     WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1981     WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1982 
1983     WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1984             GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1985 
1986     WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1987 
1988     WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1989 
1990     WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1991 }
1992 
1993 static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1994 {
1995     u32 mtr_base_lo, mtr_base_hi;
1996     u32 so_base_lo, so_base_hi;
1997     u32 gic_base_lo, gic_base_hi;
1998     u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1999 
2000     mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
2001     mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
2002     so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2003     so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2004 
2005     gic_base_lo =
2006         lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
2007     gic_base_hi =
2008         upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
2009 
2010     WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
2011     WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
2012     WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
2013     WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
2014 
2015     WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
2016 
2017     WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
2018     WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
2019 
2020     WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
2021             GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
2022 
2023     WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
2024 
2025     WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
2026 
2027     WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
2028 }
2029 
2030 void goya_init_tpc_qmans(struct hl_device *hdev)
2031 {
2032     struct goya_device *goya = hdev->asic_specific;
2033     u32 so_base_lo, so_base_hi;
2034     u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
2035             mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
2036     int i;
2037 
2038     if (goya->hw_cap_initialized & HW_CAP_TPC)
2039         return;
2040 
2041     so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2042     so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2043 
2044     for (i = 0 ; i < TPC_MAX_NUM ; i++) {
2045         WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
2046                 so_base_lo);
2047         WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
2048                 so_base_hi);
2049     }
2050 
2051     goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
2052     goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
2053     goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
2054     goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
2055     goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
2056     goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
2057     goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
2058     goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
2059 
2060     for (i = 0 ; i < TPC_MAX_NUM ; i++)
2061         goya_init_tpc_cmdq(hdev, i);
2062 
2063     goya->hw_cap_initialized |= HW_CAP_TPC;
2064 }
2065 
2066 /*
2067  * goya_disable_internal_queues - Disable internal queues
2068  *
2069  * @hdev: pointer to hl_device structure
2070  *
2071  */
2072 static void goya_disable_internal_queues(struct hl_device *hdev)
2073 {
2074     struct goya_device *goya = hdev->asic_specific;
2075 
2076     if (!(goya->hw_cap_initialized & HW_CAP_MME))
2077         goto disable_tpc;
2078 
2079     WREG32(mmMME_QM_GLBL_CFG0, 0);
2080     WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
2081 
2082 disable_tpc:
2083     if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2084         return;
2085 
2086     WREG32(mmTPC0_QM_GLBL_CFG0, 0);
2087     WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
2088 
2089     WREG32(mmTPC1_QM_GLBL_CFG0, 0);
2090     WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
2091 
2092     WREG32(mmTPC2_QM_GLBL_CFG0, 0);
2093     WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
2094 
2095     WREG32(mmTPC3_QM_GLBL_CFG0, 0);
2096     WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
2097 
2098     WREG32(mmTPC4_QM_GLBL_CFG0, 0);
2099     WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
2100 
2101     WREG32(mmTPC5_QM_GLBL_CFG0, 0);
2102     WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
2103 
2104     WREG32(mmTPC6_QM_GLBL_CFG0, 0);
2105     WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
2106 
2107     WREG32(mmTPC7_QM_GLBL_CFG0, 0);
2108     WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
2109 }
2110 
2111 /*
2112  * goya_stop_internal_queues - Stop internal queues
2113  *
2114  * @hdev: pointer to hl_device structure
2115  *
2116  * Returns 0 on success
2117  *
2118  */
2119 static int goya_stop_internal_queues(struct hl_device *hdev)
2120 {
2121     struct goya_device *goya = hdev->asic_specific;
2122     int rc, retval = 0;
2123 
2124     if (!(goya->hw_cap_initialized & HW_CAP_MME))
2125         goto stop_tpc;
2126 
2127     /*
2128      * Each queue (QMAN) is a separate H/W logic. That means that each
2129      * QMAN can be stopped independently and failure to stop one does NOT
2130      * mandate we should not try to stop other QMANs
2131      */
2132 
2133     rc = goya_stop_queue(hdev,
2134             mmMME_QM_GLBL_CFG1,
2135             mmMME_QM_CP_STS,
2136             mmMME_QM_GLBL_STS0);
2137 
2138     if (rc) {
2139         dev_err(hdev->dev, "failed to stop MME QMAN\n");
2140         retval = -EIO;
2141     }
2142 
2143     rc = goya_stop_queue(hdev,
2144             mmMME_CMDQ_GLBL_CFG1,
2145             mmMME_CMDQ_CP_STS,
2146             mmMME_CMDQ_GLBL_STS0);
2147 
2148     if (rc) {
2149         dev_err(hdev->dev, "failed to stop MME CMDQ\n");
2150         retval = -EIO;
2151     }
2152 
2153 stop_tpc:
2154     if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2155         return retval;
2156 
2157     rc = goya_stop_queue(hdev,
2158             mmTPC0_QM_GLBL_CFG1,
2159             mmTPC0_QM_CP_STS,
2160             mmTPC0_QM_GLBL_STS0);
2161 
2162     if (rc) {
2163         dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
2164         retval = -EIO;
2165     }
2166 
2167     rc = goya_stop_queue(hdev,
2168             mmTPC0_CMDQ_GLBL_CFG1,
2169             mmTPC0_CMDQ_CP_STS,
2170             mmTPC0_CMDQ_GLBL_STS0);
2171 
2172     if (rc) {
2173         dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
2174         retval = -EIO;
2175     }
2176 
2177     rc = goya_stop_queue(hdev,
2178             mmTPC1_QM_GLBL_CFG1,
2179             mmTPC1_QM_CP_STS,
2180             mmTPC1_QM_GLBL_STS0);
2181 
2182     if (rc) {
2183         dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
2184         retval = -EIO;
2185     }
2186 
2187     rc = goya_stop_queue(hdev,
2188             mmTPC1_CMDQ_GLBL_CFG1,
2189             mmTPC1_CMDQ_CP_STS,
2190             mmTPC1_CMDQ_GLBL_STS0);
2191 
2192     if (rc) {
2193         dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
2194         retval = -EIO;
2195     }
2196 
2197     rc = goya_stop_queue(hdev,
2198             mmTPC2_QM_GLBL_CFG1,
2199             mmTPC2_QM_CP_STS,
2200             mmTPC2_QM_GLBL_STS0);
2201 
2202     if (rc) {
2203         dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
2204         retval = -EIO;
2205     }
2206 
2207     rc = goya_stop_queue(hdev,
2208             mmTPC2_CMDQ_GLBL_CFG1,
2209             mmTPC2_CMDQ_CP_STS,
2210             mmTPC2_CMDQ_GLBL_STS0);
2211 
2212     if (rc) {
2213         dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2214         retval = -EIO;
2215     }
2216 
2217     rc = goya_stop_queue(hdev,
2218             mmTPC3_QM_GLBL_CFG1,
2219             mmTPC3_QM_CP_STS,
2220             mmTPC3_QM_GLBL_STS0);
2221 
2222     if (rc) {
2223         dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2224         retval = -EIO;
2225     }
2226 
2227     rc = goya_stop_queue(hdev,
2228             mmTPC3_CMDQ_GLBL_CFG1,
2229             mmTPC3_CMDQ_CP_STS,
2230             mmTPC3_CMDQ_GLBL_STS0);
2231 
2232     if (rc) {
2233         dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2234         retval = -EIO;
2235     }
2236 
2237     rc = goya_stop_queue(hdev,
2238             mmTPC4_QM_GLBL_CFG1,
2239             mmTPC4_QM_CP_STS,
2240             mmTPC4_QM_GLBL_STS0);
2241 
2242     if (rc) {
2243         dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2244         retval = -EIO;
2245     }
2246 
2247     rc = goya_stop_queue(hdev,
2248             mmTPC4_CMDQ_GLBL_CFG1,
2249             mmTPC4_CMDQ_CP_STS,
2250             mmTPC4_CMDQ_GLBL_STS0);
2251 
2252     if (rc) {
2253         dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2254         retval = -EIO;
2255     }
2256 
2257     rc = goya_stop_queue(hdev,
2258             mmTPC5_QM_GLBL_CFG1,
2259             mmTPC5_QM_CP_STS,
2260             mmTPC5_QM_GLBL_STS0);
2261 
2262     if (rc) {
2263         dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2264         retval = -EIO;
2265     }
2266 
2267     rc = goya_stop_queue(hdev,
2268             mmTPC5_CMDQ_GLBL_CFG1,
2269             mmTPC5_CMDQ_CP_STS,
2270             mmTPC5_CMDQ_GLBL_STS0);
2271 
2272     if (rc) {
2273         dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2274         retval = -EIO;
2275     }
2276 
2277     rc = goya_stop_queue(hdev,
2278             mmTPC6_QM_GLBL_CFG1,
2279             mmTPC6_QM_CP_STS,
2280             mmTPC6_QM_GLBL_STS0);
2281 
2282     if (rc) {
2283         dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2284         retval = -EIO;
2285     }
2286 
2287     rc = goya_stop_queue(hdev,
2288             mmTPC6_CMDQ_GLBL_CFG1,
2289             mmTPC6_CMDQ_CP_STS,
2290             mmTPC6_CMDQ_GLBL_STS0);
2291 
2292     if (rc) {
2293         dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2294         retval = -EIO;
2295     }
2296 
2297     rc = goya_stop_queue(hdev,
2298             mmTPC7_QM_GLBL_CFG1,
2299             mmTPC7_QM_CP_STS,
2300             mmTPC7_QM_GLBL_STS0);
2301 
2302     if (rc) {
2303         dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2304         retval = -EIO;
2305     }
2306 
2307     rc = goya_stop_queue(hdev,
2308             mmTPC7_CMDQ_GLBL_CFG1,
2309             mmTPC7_CMDQ_CP_STS,
2310             mmTPC7_CMDQ_GLBL_STS0);
2311 
2312     if (rc) {
2313         dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2314         retval = -EIO;
2315     }
2316 
2317     return retval;
2318 }
2319 
2320 static void goya_dma_stall(struct hl_device *hdev)
2321 {
2322     struct goya_device *goya = hdev->asic_specific;
2323 
2324     if (!(goya->hw_cap_initialized & HW_CAP_DMA))
2325         return;
2326 
2327     WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2328     WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2329     WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2330     WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2331     WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2332 }
2333 
2334 static void goya_tpc_stall(struct hl_device *hdev)
2335 {
2336     struct goya_device *goya = hdev->asic_specific;
2337 
2338     if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2339         return;
2340 
2341     WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2342     WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2343     WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2344     WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2345     WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2346     WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2347     WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2348     WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2349 }
2350 
2351 static void goya_mme_stall(struct hl_device *hdev)
2352 {
2353     struct goya_device *goya = hdev->asic_specific;
2354 
2355     if (!(goya->hw_cap_initialized & HW_CAP_MME))
2356         return;
2357 
2358     WREG32(mmMME_STALL, 0xFFFFFFFF);
2359 }
2360 
2361 static int goya_enable_msix(struct hl_device *hdev)
2362 {
2363     struct goya_device *goya = hdev->asic_specific;
2364     int cq_cnt = hdev->asic_prop.completion_queues_count;
2365     int rc, i, irq_cnt_init, irq;
2366 
2367     if (goya->hw_cap_initialized & HW_CAP_MSIX)
2368         return 0;
2369 
2370     rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2371                 GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2372     if (rc < 0) {
2373         dev_err(hdev->dev,
2374             "MSI-X: Failed to enable support -- %d/%d\n",
2375             GOYA_MSIX_ENTRIES, rc);
2376         return rc;
2377     }
2378 
2379     for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2380         irq = pci_irq_vector(hdev->pdev, i);
2381         rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2382                 &hdev->completion_queue[i]);
2383         if (rc) {
2384             dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2385             goto free_irqs;
2386         }
2387     }
2388 
2389     irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2390 
2391     rc = request_irq(irq, hl_irq_handler_eq, 0,
2392             goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2393             &hdev->event_queue);
2394     if (rc) {
2395         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2396         goto free_irqs;
2397     }
2398 
2399     goya->hw_cap_initialized |= HW_CAP_MSIX;
2400     return 0;
2401 
2402 free_irqs:
2403     for (i = 0 ; i < irq_cnt_init ; i++)
2404         free_irq(pci_irq_vector(hdev->pdev, i),
2405             &hdev->completion_queue[i]);
2406 
2407     pci_free_irq_vectors(hdev->pdev);
2408     return rc;
2409 }
2410 
2411 static void goya_sync_irqs(struct hl_device *hdev)
2412 {
2413     struct goya_device *goya = hdev->asic_specific;
2414     int i;
2415 
2416     if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2417         return;
2418 
2419     /* Wait for all pending IRQs to be finished */
2420     for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2421         synchronize_irq(pci_irq_vector(hdev->pdev, i));
2422 
2423     synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2424 }
2425 
2426 static void goya_disable_msix(struct hl_device *hdev)
2427 {
2428     struct goya_device *goya = hdev->asic_specific;
2429     int i, irq;
2430 
2431     if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2432         return;
2433 
2434     goya_sync_irqs(hdev);
2435 
2436     irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2437     free_irq(irq, &hdev->event_queue);
2438 
2439     for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2440         irq = pci_irq_vector(hdev->pdev, i);
2441         free_irq(irq, &hdev->completion_queue[i]);
2442     }
2443 
2444     pci_free_irq_vectors(hdev->pdev);
2445 
2446     goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2447 }
2448 
2449 static void goya_enable_timestamp(struct hl_device *hdev)
2450 {
2451     /* Disable the timestamp counter */
2452     WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2453 
2454     /* Zero the lower/upper parts of the 64-bit counter */
2455     WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2456     WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2457 
2458     /* Enable the counter */
2459     WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2460 }
2461 
2462 static void goya_disable_timestamp(struct hl_device *hdev)
2463 {
2464     /* Disable the timestamp counter */
2465     WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2466 }
2467 
2468 static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
2469 {
2470     u32 wait_timeout_ms;
2471 
2472     if (hdev->pldm)
2473         wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2474     else
2475         wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2476 
2477     goya_stop_external_queues(hdev);
2478     goya_stop_internal_queues(hdev);
2479 
2480     msleep(wait_timeout_ms);
2481 
2482     goya_dma_stall(hdev);
2483     goya_tpc_stall(hdev);
2484     goya_mme_stall(hdev);
2485 
2486     msleep(wait_timeout_ms);
2487 
2488     goya_disable_external_queues(hdev);
2489     goya_disable_internal_queues(hdev);
2490 
2491     goya_disable_timestamp(hdev);
2492 
2493     if (hard_reset) {
2494         goya_disable_msix(hdev);
2495         goya_mmu_remove_device_cpu_mappings(hdev);
2496     } else {
2497         goya_sync_irqs(hdev);
2498     }
2499 }
2500 
2501 /*
2502  * goya_load_firmware_to_device() - Load LINUX FW code to device.
2503  * @hdev: Pointer to hl_device structure.
2504  *
2505  * Copy LINUX fw code from firmware file to HBM BAR.
2506  *
2507  * Return: 0 on success, non-zero for failure.
2508  */
2509 static int goya_load_firmware_to_device(struct hl_device *hdev)
2510 {
2511     void __iomem *dst;
2512 
2513     dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2514 
2515     return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst, 0, 0);
2516 }
2517 
2518 /*
2519  * goya_load_boot_fit_to_device() - Load boot fit to device.
2520  * @hdev: Pointer to hl_device structure.
2521  *
2522  * Copy boot fit file to SRAM BAR.
2523  *
2524  * Return: 0 on success, non-zero for failure.
2525  */
2526 static int goya_load_boot_fit_to_device(struct hl_device *hdev)
2527 {
2528     void __iomem *dst;
2529 
2530     dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2531 
2532     return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0);
2533 }
2534 
2535 static void goya_init_dynamic_firmware_loader(struct hl_device *hdev)
2536 {
2537     struct dynamic_fw_load_mgr *dynamic_loader;
2538     struct cpu_dyn_regs *dyn_regs;
2539 
2540     dynamic_loader = &hdev->fw_loader.dynamic_loader;
2541 
2542     /*
2543      * here we update initial values for few specific dynamic regs (as
2544      * before reading the first descriptor from FW those value has to be
2545      * hard-coded) in later stages of the protocol those values will be
2546      * updated automatically by reading the FW descriptor so data there
2547      * will always be up-to-date
2548      */
2549     dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
2550     dyn_regs->kmd_msg_to_cpu =
2551                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
2552     dyn_regs->cpu_cmd_status_to_host =
2553                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
2554 
2555     dynamic_loader->wait_for_bl_timeout = GOYA_WAIT_FOR_BL_TIMEOUT_USEC;
2556 }
2557 
2558 static void goya_init_static_firmware_loader(struct hl_device *hdev)
2559 {
2560     struct static_fw_load_mgr *static_loader;
2561 
2562     static_loader = &hdev->fw_loader.static_loader;
2563 
2564     static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
2565     static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
2566     static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
2567     static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
2568     static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
2569     static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
2570     static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
2571     static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
2572     static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
2573     static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
2574     static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
2575     static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
2576 }
2577 
2578 static void goya_init_firmware_preload_params(struct hl_device *hdev)
2579 {
2580     struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
2581 
2582     pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
2583     pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
2584     pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
2585     pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
2586     pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
2587     pre_fw_load->wait_for_preboot_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
2588 }
2589 
2590 static void goya_init_firmware_loader(struct hl_device *hdev)
2591 {
2592     struct asic_fixed_properties *prop = &hdev->asic_prop;
2593     struct fw_load_mgr *fw_loader = &hdev->fw_loader;
2594 
2595     /* fill common fields */
2596     fw_loader->fw_comp_loaded = FW_TYPE_NONE;
2597     fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
2598     fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
2599     fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
2600     fw_loader->boot_fit_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
2601     fw_loader->skip_bmc = false;
2602     fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
2603     fw_loader->dram_bar_id = DDR_BAR_ID;
2604 
2605     if (prop->dynamic_fw_load)
2606         goya_init_dynamic_firmware_loader(hdev);
2607     else
2608         goya_init_static_firmware_loader(hdev);
2609 }
2610 
2611 static int goya_init_cpu(struct hl_device *hdev)
2612 {
2613     struct goya_device *goya = hdev->asic_specific;
2614     int rc;
2615 
2616     if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
2617         return 0;
2618 
2619     if (goya->hw_cap_initialized & HW_CAP_CPU)
2620         return 0;
2621 
2622     /*
2623      * Before pushing u-boot/linux to device, need to set the ddr bar to
2624      * base address of dram
2625      */
2626     if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2627         dev_err(hdev->dev,
2628             "failed to map DDR bar to DRAM base address\n");
2629         return -EIO;
2630     }
2631 
2632     rc = hl_fw_init_cpu(hdev);
2633 
2634     if (rc)
2635         return rc;
2636 
2637     goya->hw_cap_initialized |= HW_CAP_CPU;
2638 
2639     return 0;
2640 }
2641 
2642 static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2643                         u64 phys_addr)
2644 {
2645     u32 status, timeout_usec;
2646     int rc;
2647 
2648     if (hdev->pldm)
2649         timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2650     else
2651         timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2652 
2653     WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2654     WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2655     WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2656 
2657     rc = hl_poll_timeout(
2658         hdev,
2659         MMU_ASID_BUSY,
2660         status,
2661         !(status & 0x80000000),
2662         1000,
2663         timeout_usec);
2664 
2665     if (rc) {
2666         dev_err(hdev->dev,
2667             "Timeout during MMU hop0 config of asid %d\n", asid);
2668         return rc;
2669     }
2670 
2671     return 0;
2672 }
2673 
2674 int goya_mmu_init(struct hl_device *hdev)
2675 {
2676     struct asic_fixed_properties *prop = &hdev->asic_prop;
2677     struct goya_device *goya = hdev->asic_specific;
2678     u64 hop0_addr;
2679     int rc, i;
2680 
2681     if (!hdev->mmu_enable)
2682         return 0;
2683 
2684     if (goya->hw_cap_initialized & HW_CAP_MMU)
2685         return 0;
2686 
2687     hdev->dram_default_page_mapping = true;
2688 
2689     for (i = 0 ; i < prop->max_asid ; i++) {
2690         hop0_addr = prop->mmu_pgt_addr +
2691                 (i * prop->mmu_hop_table_size);
2692 
2693         rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2694         if (rc) {
2695             dev_err(hdev->dev,
2696                 "failed to set hop0 addr for asid %d\n", i);
2697             goto err;
2698         }
2699     }
2700 
2701     goya->hw_cap_initialized |= HW_CAP_MMU;
2702 
2703     /* init MMU cache manage page */
2704     WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2705                 lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2706     WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2707 
2708     /* Remove follower feature due to performance bug */
2709     WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2710             (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2711 
2712     hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
2713 
2714     WREG32(mmMMU_MMU_ENABLE, 1);
2715     WREG32(mmMMU_SPI_MASK, 0xF);
2716 
2717     return 0;
2718 
2719 err:
2720     return rc;
2721 }
2722 
2723 /*
2724  * goya_hw_init - Goya hardware initialization code
2725  *
2726  * @hdev: pointer to hl_device structure
2727  *
2728  * Returns 0 on success
2729  *
2730  */
2731 static int goya_hw_init(struct hl_device *hdev)
2732 {
2733     struct asic_fixed_properties *prop = &hdev->asic_prop;
2734     int rc;
2735 
2736     /* Perform read from the device to make sure device is up */
2737     RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2738 
2739     /*
2740      * Let's mark in the H/W that we have reached this point. We check
2741      * this value in the reset_before_init function to understand whether
2742      * we need to reset the chip before doing H/W init. This register is
2743      * cleared by the H/W upon H/W reset
2744      */
2745     WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2746 
2747     rc = goya_init_cpu(hdev);
2748     if (rc) {
2749         dev_err(hdev->dev, "failed to initialize CPU\n");
2750         return rc;
2751     }
2752 
2753     goya_tpc_mbist_workaround(hdev);
2754 
2755     goya_init_golden_registers(hdev);
2756 
2757     /*
2758      * After CPU initialization is finished, change DDR bar mapping inside
2759      * iATU to point to the start address of the MMU page tables
2760      */
2761     if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
2762             ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2763         dev_err(hdev->dev,
2764             "failed to map DDR bar to MMU page tables\n");
2765         return -EIO;
2766     }
2767 
2768     rc = goya_mmu_init(hdev);
2769     if (rc)
2770         return rc;
2771 
2772     goya_init_security(hdev);
2773 
2774     goya_init_dma_qmans(hdev);
2775 
2776     goya_init_mme_qmans(hdev);
2777 
2778     goya_init_tpc_qmans(hdev);
2779 
2780     goya_enable_timestamp(hdev);
2781 
2782     /* MSI-X must be enabled before CPU queues are initialized */
2783     rc = goya_enable_msix(hdev);
2784     if (rc)
2785         goto disable_queues;
2786 
2787     /* Perform read from the device to flush all MSI-X configuration */
2788     RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2789 
2790     return 0;
2791 
2792 disable_queues:
2793     goya_disable_internal_queues(hdev);
2794     goya_disable_external_queues(hdev);
2795 
2796     return rc;
2797 }
2798 
2799 static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
2800 {
2801     struct goya_device *goya = hdev->asic_specific;
2802     u32 reset_timeout_ms, cpu_timeout_ms, status;
2803 
2804     if (hdev->pldm) {
2805         reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2806         cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2807     } else {
2808         reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2809         cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2810     }
2811 
2812     if (hard_reset) {
2813         /* I don't know what is the state of the CPU so make sure it is
2814          * stopped in any means necessary
2815          */
2816         WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2817         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2818             GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2819 
2820         msleep(cpu_timeout_ms);
2821 
2822         goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2823         goya_disable_clk_rlx(hdev);
2824         goya_set_pll_refclk(hdev);
2825 
2826         WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2827         dev_dbg(hdev->dev,
2828             "Issued HARD reset command, going to wait %dms\n",
2829             reset_timeout_ms);
2830     } else {
2831         WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2832         dev_dbg(hdev->dev,
2833             "Issued SOFT reset command, going to wait %dms\n",
2834             reset_timeout_ms);
2835     }
2836 
2837     /*
2838      * After hard reset, we can't poll the BTM_FSM register because the PSOC
2839      * itself is in reset. In either reset we need to wait until the reset
2840      * is deasserted
2841      */
2842     msleep(reset_timeout_ms);
2843 
2844     status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2845     if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2846         dev_err(hdev->dev,
2847             "Timeout while waiting for device to reset 0x%x\n",
2848             status);
2849 
2850     if (!hard_reset && goya) {
2851         goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2852                         HW_CAP_GOLDEN | HW_CAP_TPC);
2853         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2854                 GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2855         return;
2856     }
2857 
2858     /* Chicken bit to re-initiate boot sequencer flow */
2859     WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2860         1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2861     /* Move boot manager FSM to pre boot sequencer init state */
2862     WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2863             0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2864 
2865     if (goya) {
2866         goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2867                 HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2868                 HW_CAP_DMA | HW_CAP_MME |
2869                 HW_CAP_MMU | HW_CAP_TPC_MBIST |
2870                 HW_CAP_GOLDEN | HW_CAP_TPC);
2871 
2872         memset(goya->events_stat, 0, sizeof(goya->events_stat));
2873     }
2874 }
2875 
2876 int goya_suspend(struct hl_device *hdev)
2877 {
2878     int rc;
2879 
2880     rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2881     if (rc)
2882         dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2883 
2884     return rc;
2885 }
2886 
2887 int goya_resume(struct hl_device *hdev)
2888 {
2889     return goya_init_iatu(hdev);
2890 }
2891 
2892 static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2893             void *cpu_addr, dma_addr_t dma_addr, size_t size)
2894 {
2895     int rc;
2896 
2897     vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2898             VM_DONTCOPY | VM_NORESERVE;
2899 
2900     rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
2901                 (dma_addr - HOST_PHYS_BASE), size);
2902     if (rc)
2903         dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
2904 
2905     return rc;
2906 }
2907 
2908 void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2909 {
2910     u32 db_reg_offset, db_value;
2911 
2912     switch (hw_queue_id) {
2913     case GOYA_QUEUE_ID_DMA_0:
2914         db_reg_offset = mmDMA_QM_0_PQ_PI;
2915         break;
2916 
2917     case GOYA_QUEUE_ID_DMA_1:
2918         db_reg_offset = mmDMA_QM_1_PQ_PI;
2919         break;
2920 
2921     case GOYA_QUEUE_ID_DMA_2:
2922         db_reg_offset = mmDMA_QM_2_PQ_PI;
2923         break;
2924 
2925     case GOYA_QUEUE_ID_DMA_3:
2926         db_reg_offset = mmDMA_QM_3_PQ_PI;
2927         break;
2928 
2929     case GOYA_QUEUE_ID_DMA_4:
2930         db_reg_offset = mmDMA_QM_4_PQ_PI;
2931         break;
2932 
2933     case GOYA_QUEUE_ID_CPU_PQ:
2934         db_reg_offset = mmCPU_IF_PF_PQ_PI;
2935         break;
2936 
2937     case GOYA_QUEUE_ID_MME:
2938         db_reg_offset = mmMME_QM_PQ_PI;
2939         break;
2940 
2941     case GOYA_QUEUE_ID_TPC0:
2942         db_reg_offset = mmTPC0_QM_PQ_PI;
2943         break;
2944 
2945     case GOYA_QUEUE_ID_TPC1:
2946         db_reg_offset = mmTPC1_QM_PQ_PI;
2947         break;
2948 
2949     case GOYA_QUEUE_ID_TPC2:
2950         db_reg_offset = mmTPC2_QM_PQ_PI;
2951         break;
2952 
2953     case GOYA_QUEUE_ID_TPC3:
2954         db_reg_offset = mmTPC3_QM_PQ_PI;
2955         break;
2956 
2957     case GOYA_QUEUE_ID_TPC4:
2958         db_reg_offset = mmTPC4_QM_PQ_PI;
2959         break;
2960 
2961     case GOYA_QUEUE_ID_TPC5:
2962         db_reg_offset = mmTPC5_QM_PQ_PI;
2963         break;
2964 
2965     case GOYA_QUEUE_ID_TPC6:
2966         db_reg_offset = mmTPC6_QM_PQ_PI;
2967         break;
2968 
2969     case GOYA_QUEUE_ID_TPC7:
2970         db_reg_offset = mmTPC7_QM_PQ_PI;
2971         break;
2972 
2973     default:
2974         /* Should never get here */
2975         dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2976             hw_queue_id);
2977         return;
2978     }
2979 
2980     db_value = pi;
2981 
2982     /* ring the doorbell */
2983     WREG32(db_reg_offset, db_value);
2984 
2985     if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) {
2986         /* make sure device CPU will read latest data from host */
2987         mb();
2988         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2989                 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2990     }
2991 }
2992 
2993 void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2994 {
2995     /* The QMANs are on the SRAM so need to copy to IO space */
2996     memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2997 }
2998 
2999 static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3000                     dma_addr_t *dma_handle, gfp_t flags)
3001 {
3002     void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3003                         dma_handle, flags);
3004 
3005     /* Shift to the device's base physical address of host memory */
3006     if (kernel_addr)
3007         *dma_handle += HOST_PHYS_BASE;
3008 
3009     return kernel_addr;
3010 }
3011 
3012 static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
3013                     void *cpu_addr, dma_addr_t dma_handle)
3014 {
3015     /* Cancel the device's base physical address of host memory */
3016     dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3017 
3018     dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3019 }
3020 
3021 int goya_scrub_device_mem(struct hl_device *hdev)
3022 {
3023     return 0;
3024 }
3025 
3026 void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
3027                 dma_addr_t *dma_handle, u16 *queue_len)
3028 {
3029     void *base;
3030     u32 offset;
3031 
3032     *dma_handle = hdev->asic_prop.sram_base_address;
3033 
3034     base = (__force void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
3035 
3036     switch (queue_id) {
3037     case GOYA_QUEUE_ID_MME:
3038         offset = MME_QMAN_BASE_OFFSET;
3039         *queue_len = MME_QMAN_LENGTH;
3040         break;
3041     case GOYA_QUEUE_ID_TPC0:
3042         offset = TPC0_QMAN_BASE_OFFSET;
3043         *queue_len = TPC_QMAN_LENGTH;
3044         break;
3045     case GOYA_QUEUE_ID_TPC1:
3046         offset = TPC1_QMAN_BASE_OFFSET;
3047         *queue_len = TPC_QMAN_LENGTH;
3048         break;
3049     case GOYA_QUEUE_ID_TPC2:
3050         offset = TPC2_QMAN_BASE_OFFSET;
3051         *queue_len = TPC_QMAN_LENGTH;
3052         break;
3053     case GOYA_QUEUE_ID_TPC3:
3054         offset = TPC3_QMAN_BASE_OFFSET;
3055         *queue_len = TPC_QMAN_LENGTH;
3056         break;
3057     case GOYA_QUEUE_ID_TPC4:
3058         offset = TPC4_QMAN_BASE_OFFSET;
3059         *queue_len = TPC_QMAN_LENGTH;
3060         break;
3061     case GOYA_QUEUE_ID_TPC5:
3062         offset = TPC5_QMAN_BASE_OFFSET;
3063         *queue_len = TPC_QMAN_LENGTH;
3064         break;
3065     case GOYA_QUEUE_ID_TPC6:
3066         offset = TPC6_QMAN_BASE_OFFSET;
3067         *queue_len = TPC_QMAN_LENGTH;
3068         break;
3069     case GOYA_QUEUE_ID_TPC7:
3070         offset = TPC7_QMAN_BASE_OFFSET;
3071         *queue_len = TPC_QMAN_LENGTH;
3072         break;
3073     default:
3074         dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3075         return NULL;
3076     }
3077 
3078     base += offset;
3079     *dma_handle += offset;
3080 
3081     return base;
3082 }
3083 
3084 static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
3085 {
3086     struct packet_msg_prot *fence_pkt;
3087     u32 *fence_ptr;
3088     dma_addr_t fence_dma_addr;
3089     struct hl_cb *cb;
3090     u32 tmp, timeout;
3091     int rc;
3092 
3093     if (hdev->pldm)
3094         timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
3095     else
3096         timeout = HL_DEVICE_TIMEOUT_USEC;
3097 
3098     if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
3099         dev_err_ratelimited(hdev->dev,
3100             "Can't send driver job on QMAN0 because the device is not idle\n");
3101         return -EBUSY;
3102     }
3103 
3104     fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
3105     if (!fence_ptr) {
3106         dev_err(hdev->dev,
3107             "Failed to allocate fence memory for QMAN0\n");
3108         return -ENOMEM;
3109     }
3110 
3111     goya_qman0_set_security(hdev, true);
3112 
3113     cb = job->patched_cb;
3114 
3115     fence_pkt = cb->kernel_address +
3116             job->job_cb_size - sizeof(struct packet_msg_prot);
3117 
3118     tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3119             (1 << GOYA_PKT_CTL_EB_SHIFT) |
3120             (1 << GOYA_PKT_CTL_MB_SHIFT);
3121     fence_pkt->ctl = cpu_to_le32(tmp);
3122     fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
3123     fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3124 
3125     rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
3126                     job->job_cb_size, cb->bus_address);
3127     if (rc) {
3128         dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
3129         goto free_fence_ptr;
3130     }
3131 
3132     rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
3133                 (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
3134                 timeout, true);
3135 
3136     hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
3137 
3138     if (rc == -ETIMEDOUT) {
3139         dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
3140         goto free_fence_ptr;
3141     }
3142 
3143 free_fence_ptr:
3144     hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
3145 
3146     goya_qman0_set_security(hdev, false);
3147 
3148     return rc;
3149 }
3150 
3151 int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
3152                 u32 timeout, u64 *result)
3153 {
3154     struct goya_device *goya = hdev->asic_specific;
3155 
3156     if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
3157         if (result)
3158             *result = 0;
3159         return 0;
3160     }
3161 
3162     if (!timeout)
3163         timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
3164 
3165     return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
3166                     timeout, result);
3167 }
3168 
3169 int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3170 {
3171     struct packet_msg_prot *fence_pkt;
3172     dma_addr_t pkt_dma_addr;
3173     u32 fence_val, tmp;
3174     dma_addr_t fence_dma_addr;
3175     u32 *fence_ptr;
3176     int rc;
3177 
3178     fence_val = GOYA_QMAN0_FENCE_VAL;
3179 
3180     fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
3181     if (!fence_ptr) {
3182         dev_err(hdev->dev,
3183             "Failed to allocate memory for H/W queue %d testing\n",
3184             hw_queue_id);
3185         return -ENOMEM;
3186     }
3187 
3188     *fence_ptr = 0;
3189 
3190     fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
3191                         &pkt_dma_addr);
3192     if (!fence_pkt) {
3193         dev_err(hdev->dev,
3194             "Failed to allocate packet for H/W queue %d testing\n",
3195             hw_queue_id);
3196         rc = -ENOMEM;
3197         goto free_fence_ptr;
3198     }
3199 
3200     tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3201             (1 << GOYA_PKT_CTL_EB_SHIFT) |
3202             (1 << GOYA_PKT_CTL_MB_SHIFT);
3203     fence_pkt->ctl = cpu_to_le32(tmp);
3204     fence_pkt->value = cpu_to_le32(fence_val);
3205     fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3206 
3207     rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3208                     sizeof(struct packet_msg_prot),
3209                     pkt_dma_addr);
3210     if (rc) {
3211         dev_err(hdev->dev,
3212             "Failed to send fence packet to H/W queue %d\n",
3213             hw_queue_id);
3214         goto free_pkt;
3215     }
3216 
3217     rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3218                     1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
3219 
3220     hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3221 
3222     if (rc == -ETIMEDOUT) {
3223         dev_err(hdev->dev,
3224             "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3225             hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3226         rc = -EIO;
3227     }
3228 
3229 free_pkt:
3230     hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
3231 free_fence_ptr:
3232     hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
3233     return rc;
3234 }
3235 
3236 int goya_test_cpu_queue(struct hl_device *hdev)
3237 {
3238     struct goya_device *goya = hdev->asic_specific;
3239 
3240     /*
3241      * check capability here as send_cpu_message() won't update the result
3242      * value if no capability
3243      */
3244     if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3245         return 0;
3246 
3247     return hl_fw_test_cpu_queue(hdev);
3248 }
3249 
3250 int goya_test_queues(struct hl_device *hdev)
3251 {
3252     int i, rc, ret_val = 0;
3253 
3254     for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3255         rc = goya_test_queue(hdev, i);
3256         if (rc)
3257             ret_val = -EINVAL;
3258     }
3259 
3260     return ret_val;
3261 }
3262 
3263 static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3264                     gfp_t mem_flags, dma_addr_t *dma_handle)
3265 {
3266     void *kernel_addr;
3267 
3268     if (size > GOYA_DMA_POOL_BLK_SIZE)
3269         return NULL;
3270 
3271     kernel_addr =  dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3272 
3273     /* Shift to the device's base physical address of host memory */
3274     if (kernel_addr)
3275         *dma_handle += HOST_PHYS_BASE;
3276 
3277     return kernel_addr;
3278 }
3279 
3280 static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3281                 dma_addr_t dma_addr)
3282 {
3283     /* Cancel the device's base physical address of host memory */
3284     dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3285 
3286     dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3287 }
3288 
3289 void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3290                     dma_addr_t *dma_handle)
3291 {
3292     void *vaddr;
3293 
3294     vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3295     *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3296             VA_CPU_ACCESSIBLE_MEM_ADDR;
3297 
3298     return vaddr;
3299 }
3300 
3301 void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3302                     void *vaddr)
3303 {
3304     hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3305 }
3306 
3307 u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3308 {
3309     struct scatterlist *sg, *sg_next_iter;
3310     u32 count, dma_desc_cnt;
3311     u64 len, len_next;
3312     dma_addr_t addr, addr_next;
3313 
3314     dma_desc_cnt = 0;
3315 
3316     for_each_sgtable_dma_sg(sgt, sg, count) {
3317         len = sg_dma_len(sg);
3318         addr = sg_dma_address(sg);
3319 
3320         if (len == 0)
3321             break;
3322 
3323         while ((count + 1) < sgt->nents) {
3324             sg_next_iter = sg_next(sg);
3325             len_next = sg_dma_len(sg_next_iter);
3326             addr_next = sg_dma_address(sg_next_iter);
3327 
3328             if (len_next == 0)
3329                 break;
3330 
3331             if ((addr + len == addr_next) &&
3332                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3333                 len += len_next;
3334                 count++;
3335                 sg = sg_next_iter;
3336             } else {
3337                 break;
3338             }
3339         }
3340 
3341         dma_desc_cnt++;
3342     }
3343 
3344     return dma_desc_cnt * sizeof(struct packet_lin_dma);
3345 }
3346 
3347 static int goya_pin_memory_before_cs(struct hl_device *hdev,
3348                 struct hl_cs_parser *parser,
3349                 struct packet_lin_dma *user_dma_pkt,
3350                 u64 addr, enum dma_data_direction dir)
3351 {
3352     struct hl_userptr *userptr;
3353     int rc;
3354 
3355     if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3356             parser->job_userptr_list, &userptr))
3357         goto already_pinned;
3358 
3359     userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
3360     if (!userptr)
3361         return -ENOMEM;
3362 
3363     rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3364                 userptr);
3365     if (rc)
3366         goto free_userptr;
3367 
3368     list_add_tail(&userptr->job_node, parser->job_userptr_list);
3369 
3370     rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
3371     if (rc) {
3372         dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3373         goto unpin_memory;
3374     }
3375 
3376     userptr->dma_mapped = true;
3377     userptr->dir = dir;
3378 
3379 already_pinned:
3380     parser->patched_cb_size +=
3381             goya_get_dma_desc_list_size(hdev, userptr->sgt);
3382 
3383     return 0;
3384 
3385 unpin_memory:
3386     list_del(&userptr->job_node);
3387     hl_unpin_host_memory(hdev, userptr);
3388 free_userptr:
3389     kfree(userptr);
3390     return rc;
3391 }
3392 
3393 static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3394                 struct hl_cs_parser *parser,
3395                 struct packet_lin_dma *user_dma_pkt)
3396 {
3397     u64 device_memory_addr, addr;
3398     enum dma_data_direction dir;
3399     enum hl_goya_dma_direction user_dir;
3400     bool sram_addr = true;
3401     bool skip_host_mem_pin = false;
3402     bool user_memset;
3403     u32 ctl;
3404     int rc = 0;
3405 
3406     ctl = le32_to_cpu(user_dma_pkt->ctl);
3407 
3408     user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3409             GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3410 
3411     user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3412             GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3413 
3414     switch (user_dir) {
3415     case HL_DMA_HOST_TO_DRAM:
3416         dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3417         dir = DMA_TO_DEVICE;
3418         sram_addr = false;
3419         addr = le64_to_cpu(user_dma_pkt->src_addr);
3420         device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3421         if (user_memset)
3422             skip_host_mem_pin = true;
3423         break;
3424 
3425     case HL_DMA_DRAM_TO_HOST:
3426         dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3427         dir = DMA_FROM_DEVICE;
3428         sram_addr = false;
3429         addr = le64_to_cpu(user_dma_pkt->dst_addr);
3430         device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3431         break;
3432 
3433     case HL_DMA_HOST_TO_SRAM:
3434         dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3435         dir = DMA_TO_DEVICE;
3436         addr = le64_to_cpu(user_dma_pkt->src_addr);
3437         device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3438         if (user_memset)
3439             skip_host_mem_pin = true;
3440         break;
3441 
3442     case HL_DMA_SRAM_TO_HOST:
3443         dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3444         dir = DMA_FROM_DEVICE;
3445         addr = le64_to_cpu(user_dma_pkt->dst_addr);
3446         device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3447         break;
3448     default:
3449         dev_err(hdev->dev, "DMA direction %d is unsupported/undefined\n", user_dir);
3450         return -EFAULT;
3451     }
3452 
3453     if (sram_addr) {
3454         if (!hl_mem_area_inside_range(device_memory_addr,
3455                 le32_to_cpu(user_dma_pkt->tsize),
3456                 hdev->asic_prop.sram_user_base_address,
3457                 hdev->asic_prop.sram_end_address)) {
3458 
3459             dev_err(hdev->dev,
3460                 "SRAM address 0x%llx + 0x%x is invalid\n",
3461                 device_memory_addr,
3462                 user_dma_pkt->tsize);
3463             return -EFAULT;
3464         }
3465     } else {
3466         if (!hl_mem_area_inside_range(device_memory_addr,
3467                 le32_to_cpu(user_dma_pkt->tsize),
3468                 hdev->asic_prop.dram_user_base_address,
3469                 hdev->asic_prop.dram_end_address)) {
3470 
3471             dev_err(hdev->dev,
3472                 "DRAM address 0x%llx + 0x%x is invalid\n",
3473                 device_memory_addr,
3474                 user_dma_pkt->tsize);
3475             return -EFAULT;
3476         }
3477     }
3478 
3479     if (skip_host_mem_pin)
3480         parser->patched_cb_size += sizeof(*user_dma_pkt);
3481     else {
3482         if ((dir == DMA_TO_DEVICE) &&
3483                 (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3484             dev_err(hdev->dev,
3485                 "Can't DMA from host on queue other then 1\n");
3486             return -EFAULT;
3487         }
3488 
3489         rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3490                         addr, dir);
3491     }
3492 
3493     return rc;
3494 }
3495 
3496 static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3497                 struct hl_cs_parser *parser,
3498                 struct packet_lin_dma *user_dma_pkt)
3499 {
3500     u64 sram_memory_addr, dram_memory_addr;
3501     enum hl_goya_dma_direction user_dir;
3502     u32 ctl;
3503 
3504     ctl = le32_to_cpu(user_dma_pkt->ctl);
3505     user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3506             GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3507 
3508     if (user_dir == HL_DMA_DRAM_TO_SRAM) {
3509         dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3510         dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3511         sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3512     } else {
3513         dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3514         sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3515         dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3516     }
3517 
3518     if (!hl_mem_area_inside_range(sram_memory_addr,
3519                 le32_to_cpu(user_dma_pkt->tsize),
3520                 hdev->asic_prop.sram_user_base_address,
3521                 hdev->asic_prop.sram_end_address)) {
3522         dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3523             sram_memory_addr, user_dma_pkt->tsize);
3524         return -EFAULT;
3525     }
3526 
3527     if (!hl_mem_area_inside_range(dram_memory_addr,
3528                 le32_to_cpu(user_dma_pkt->tsize),
3529                 hdev->asic_prop.dram_user_base_address,
3530                 hdev->asic_prop.dram_end_address)) {
3531         dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3532             dram_memory_addr, user_dma_pkt->tsize);
3533         return -EFAULT;
3534     }
3535 
3536     parser->patched_cb_size += sizeof(*user_dma_pkt);
3537 
3538     return 0;
3539 }
3540 
3541 static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3542                 struct hl_cs_parser *parser,
3543                 struct packet_lin_dma *user_dma_pkt)
3544 {
3545     enum hl_goya_dma_direction user_dir;
3546     u32 ctl;
3547     int rc;
3548 
3549     dev_dbg(hdev->dev, "DMA packet details:\n");
3550     dev_dbg(hdev->dev, "source == 0x%llx\n",
3551         le64_to_cpu(user_dma_pkt->src_addr));
3552     dev_dbg(hdev->dev, "destination == 0x%llx\n",
3553         le64_to_cpu(user_dma_pkt->dst_addr));
3554     dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3555 
3556     ctl = le32_to_cpu(user_dma_pkt->ctl);
3557     user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3558             GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3559 
3560     /*
3561      * Special handling for DMA with size 0. The H/W has a bug where
3562      * this can cause the QMAN DMA to get stuck, so block it here.
3563      */
3564     if (user_dma_pkt->tsize == 0) {
3565         dev_err(hdev->dev,
3566             "Got DMA with size 0, might reset the device\n");
3567         return -EINVAL;
3568     }
3569 
3570     if ((user_dir == HL_DMA_DRAM_TO_SRAM) || (user_dir == HL_DMA_SRAM_TO_DRAM))
3571         rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3572     else
3573         rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3574 
3575     return rc;
3576 }
3577 
3578 static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3579                 struct hl_cs_parser *parser,
3580                 struct packet_lin_dma *user_dma_pkt)
3581 {
3582     dev_dbg(hdev->dev, "DMA packet details:\n");
3583     dev_dbg(hdev->dev, "source == 0x%llx\n",
3584         le64_to_cpu(user_dma_pkt->src_addr));
3585     dev_dbg(hdev->dev, "destination == 0x%llx\n",
3586         le64_to_cpu(user_dma_pkt->dst_addr));
3587     dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3588 
3589     /*
3590      * WA for HW-23.
3591      * We can't allow user to read from Host using QMANs other than 1.
3592      * PMMU and HPMMU addresses are equal, check only one of them.
3593      */
3594     if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3595         hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3596                 le32_to_cpu(user_dma_pkt->tsize),
3597                 hdev->asic_prop.pmmu.start_addr,
3598                 hdev->asic_prop.pmmu.end_addr)) {
3599         dev_err(hdev->dev,
3600             "Can't DMA from host on queue other then 1\n");
3601         return -EFAULT;
3602     }
3603 
3604     if (user_dma_pkt->tsize == 0) {
3605         dev_err(hdev->dev,
3606             "Got DMA with size 0, might reset the device\n");
3607         return -EINVAL;
3608     }
3609 
3610     parser->patched_cb_size += sizeof(*user_dma_pkt);
3611 
3612     return 0;
3613 }
3614 
3615 static int goya_validate_wreg32(struct hl_device *hdev,
3616                 struct hl_cs_parser *parser,
3617                 struct packet_wreg32 *wreg_pkt)
3618 {
3619     struct goya_device *goya = hdev->asic_specific;
3620     u32 sob_start_addr, sob_end_addr;
3621     u16 reg_offset;
3622 
3623     reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3624             GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3625 
3626     dev_dbg(hdev->dev, "WREG32 packet details:\n");
3627     dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3628     dev_dbg(hdev->dev, "value      == 0x%x\n",
3629         le32_to_cpu(wreg_pkt->value));
3630 
3631     if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3632         dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3633             reg_offset);
3634         return -EPERM;
3635     }
3636 
3637     /*
3638      * With MMU, DMA channels are not secured, so it doesn't matter where
3639      * the WR COMP will be written to because it will go out with
3640      * non-secured property
3641      */
3642     if (goya->hw_cap_initialized & HW_CAP_MMU)
3643         return 0;
3644 
3645     sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3646     sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3647 
3648     if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3649             (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3650 
3651         dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3652             wreg_pkt->value);
3653         return -EPERM;
3654     }
3655 
3656     return 0;
3657 }
3658 
3659 static int goya_validate_cb(struct hl_device *hdev,
3660             struct hl_cs_parser *parser, bool is_mmu)
3661 {
3662     u32 cb_parsed_length = 0;
3663     int rc = 0;
3664 
3665     parser->patched_cb_size = 0;
3666 
3667     /* cb_user_size is more than 0 so loop will always be executed */
3668     while (cb_parsed_length < parser->user_cb_size) {
3669         enum packet_id pkt_id;
3670         u16 pkt_size;
3671         struct goya_packet *user_pkt;
3672 
3673         user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3674 
3675         pkt_id = (enum packet_id) (
3676                 (le64_to_cpu(user_pkt->header) &
3677                 PACKET_HEADER_PACKET_ID_MASK) >>
3678                     PACKET_HEADER_PACKET_ID_SHIFT);
3679 
3680         if (!validate_packet_id(pkt_id)) {
3681             dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3682             rc = -EINVAL;
3683             break;
3684         }
3685 
3686         pkt_size = goya_packet_sizes[pkt_id];
3687         cb_parsed_length += pkt_size;
3688         if (cb_parsed_length > parser->user_cb_size) {
3689             dev_err(hdev->dev,
3690                 "packet 0x%x is out of CB boundary\n", pkt_id);
3691             rc = -EINVAL;
3692             break;
3693         }
3694 
3695         switch (pkt_id) {
3696         case PACKET_WREG_32:
3697             /*
3698              * Although it is validated after copy in patch_cb(),
3699              * need to validate here as well because patch_cb() is
3700              * not called in MMU path while this function is called
3701              */
3702             rc = goya_validate_wreg32(hdev,
3703                 parser, (struct packet_wreg32 *) user_pkt);
3704             parser->patched_cb_size += pkt_size;
3705             break;
3706 
3707         case PACKET_WREG_BULK:
3708             dev_err(hdev->dev,
3709                 "User not allowed to use WREG_BULK\n");
3710             rc = -EPERM;
3711             break;
3712 
3713         case PACKET_MSG_PROT:
3714             dev_err(hdev->dev,
3715                 "User not allowed to use MSG_PROT\n");
3716             rc = -EPERM;
3717             break;
3718 
3719         case PACKET_CP_DMA:
3720             dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3721             rc = -EPERM;
3722             break;
3723 
3724         case PACKET_STOP:
3725             dev_err(hdev->dev, "User not allowed to use STOP\n");
3726             rc = -EPERM;
3727             break;
3728 
3729         case PACKET_LIN_DMA:
3730             if (is_mmu)
3731                 rc = goya_validate_dma_pkt_mmu(hdev, parser,
3732                     (struct packet_lin_dma *) user_pkt);
3733             else
3734                 rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3735                     (struct packet_lin_dma *) user_pkt);
3736             break;
3737 
3738         case PACKET_MSG_LONG:
3739         case PACKET_MSG_SHORT:
3740         case PACKET_FENCE:
3741         case PACKET_NOP:
3742             parser->patched_cb_size += pkt_size;
3743             break;
3744 
3745         default:
3746             dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3747                 pkt_id);
3748             rc = -EINVAL;
3749             break;
3750         }
3751 
3752         if (rc)
3753             break;
3754     }
3755 
3756     /*
3757      * The new CB should have space at the end for two MSG_PROT packets:
3758      * 1. A packet that will act as a completion packet
3759      * 2. A packet that will generate MSI-X interrupt
3760      */
3761     parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3762 
3763     return rc;
3764 }
3765 
3766 static int goya_patch_dma_packet(struct hl_device *hdev,
3767                 struct hl_cs_parser *parser,
3768                 struct packet_lin_dma *user_dma_pkt,
3769                 struct packet_lin_dma *new_dma_pkt,
3770                 u32 *new_dma_pkt_size)
3771 {
3772     struct hl_userptr *userptr;
3773     struct scatterlist *sg, *sg_next_iter;
3774     u32 count, dma_desc_cnt;
3775     u64 len, len_next;
3776     dma_addr_t dma_addr, dma_addr_next;
3777     enum hl_goya_dma_direction user_dir;
3778     u64 device_memory_addr, addr;
3779     enum dma_data_direction dir;
3780     struct sg_table *sgt;
3781     bool skip_host_mem_pin = false;
3782     bool user_memset;
3783     u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3784 
3785     ctl = le32_to_cpu(user_dma_pkt->ctl);
3786 
3787     user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3788             GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3789 
3790     user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3791             GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3792 
3793     if ((user_dir == HL_DMA_DRAM_TO_SRAM) || (user_dir == HL_DMA_SRAM_TO_DRAM) ||
3794             (user_dma_pkt->tsize == 0)) {
3795         memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3796         *new_dma_pkt_size = sizeof(*new_dma_pkt);
3797         return 0;
3798     }
3799 
3800     if ((user_dir == HL_DMA_HOST_TO_DRAM) || (user_dir == HL_DMA_HOST_TO_SRAM)) {
3801         addr = le64_to_cpu(user_dma_pkt->src_addr);
3802         device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3803         dir = DMA_TO_DEVICE;
3804         if (user_memset)
3805             skip_host_mem_pin = true;
3806     } else {
3807         addr = le64_to_cpu(user_dma_pkt->dst_addr);
3808         device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3809         dir = DMA_FROM_DEVICE;
3810     }
3811 
3812     if ((!skip_host_mem_pin) &&
3813         (hl_userptr_is_pinned(hdev, addr,
3814             le32_to_cpu(user_dma_pkt->tsize),
3815             parser->job_userptr_list, &userptr) == false)) {
3816         dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3817                 addr, user_dma_pkt->tsize);
3818         return -EFAULT;
3819     }
3820 
3821     if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3822         memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3823         *new_dma_pkt_size = sizeof(*user_dma_pkt);
3824         return 0;
3825     }
3826 
3827     user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3828 
3829     user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3830 
3831     sgt = userptr->sgt;
3832     dma_desc_cnt = 0;
3833 
3834     for_each_sgtable_dma_sg(sgt, sg, count) {
3835         len = sg_dma_len(sg);
3836         dma_addr = sg_dma_address(sg);
3837 
3838         if (len == 0)
3839             break;
3840 
3841         while ((count + 1) < sgt->nents) {
3842             sg_next_iter = sg_next(sg);
3843             len_next = sg_dma_len(sg_next_iter);
3844             dma_addr_next = sg_dma_address(sg_next_iter);
3845 
3846             if (len_next == 0)
3847                 break;
3848 
3849             if ((dma_addr + len == dma_addr_next) &&
3850                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3851                 len += len_next;
3852                 count++;
3853                 sg = sg_next_iter;
3854             } else {
3855                 break;
3856             }
3857         }
3858 
3859         ctl = le32_to_cpu(user_dma_pkt->ctl);
3860         if (likely(dma_desc_cnt))
3861             ctl &= ~GOYA_PKT_CTL_EB_MASK;
3862         ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3863                 GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3864         new_dma_pkt->ctl = cpu_to_le32(ctl);
3865         new_dma_pkt->tsize = cpu_to_le32((u32) len);
3866 
3867         if (dir == DMA_TO_DEVICE) {
3868             new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3869             new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3870         } else {
3871             new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3872             new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3873         }
3874 
3875         if (!user_memset)
3876             device_memory_addr += len;
3877         dma_desc_cnt++;
3878         new_dma_pkt++;
3879     }
3880 
3881     if (!dma_desc_cnt) {
3882         dev_err(hdev->dev,
3883             "Error of 0 SG entries when patching DMA packet\n");
3884         return -EFAULT;
3885     }
3886 
3887     /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3888     new_dma_pkt--;
3889     new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3890 
3891     *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3892 
3893     return 0;
3894 }
3895 
3896 static int goya_patch_cb(struct hl_device *hdev,
3897                 struct hl_cs_parser *parser)
3898 {
3899     u32 cb_parsed_length = 0;
3900     u32 cb_patched_cur_length = 0;
3901     int rc = 0;
3902 
3903     /* cb_user_size is more than 0 so loop will always be executed */
3904     while (cb_parsed_length < parser->user_cb_size) {
3905         enum packet_id pkt_id;
3906         u16 pkt_size;
3907         u32 new_pkt_size = 0;
3908         struct goya_packet *user_pkt, *kernel_pkt;
3909 
3910         user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3911         kernel_pkt = parser->patched_cb->kernel_address +
3912                     cb_patched_cur_length;
3913 
3914         pkt_id = (enum packet_id) (
3915                 (le64_to_cpu(user_pkt->header) &
3916                 PACKET_HEADER_PACKET_ID_MASK) >>
3917                     PACKET_HEADER_PACKET_ID_SHIFT);
3918 
3919         if (!validate_packet_id(pkt_id)) {
3920             dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3921             rc = -EINVAL;
3922             break;
3923         }
3924 
3925         pkt_size = goya_packet_sizes[pkt_id];
3926         cb_parsed_length += pkt_size;
3927         if (cb_parsed_length > parser->user_cb_size) {
3928             dev_err(hdev->dev,
3929                 "packet 0x%x is out of CB boundary\n", pkt_id);
3930             rc = -EINVAL;
3931             break;
3932         }
3933 
3934         switch (pkt_id) {
3935         case PACKET_LIN_DMA:
3936             rc = goya_patch_dma_packet(hdev, parser,
3937                     (struct packet_lin_dma *) user_pkt,
3938                     (struct packet_lin_dma *) kernel_pkt,
3939                     &new_pkt_size);
3940             cb_patched_cur_length += new_pkt_size;
3941             break;
3942 
3943         case PACKET_WREG_32:
3944             memcpy(kernel_pkt, user_pkt, pkt_size);
3945             cb_patched_cur_length += pkt_size;
3946             rc = goya_validate_wreg32(hdev, parser,
3947                     (struct packet_wreg32 *) kernel_pkt);
3948             break;
3949 
3950         case PACKET_WREG_BULK:
3951             dev_err(hdev->dev,
3952                 "User not allowed to use WREG_BULK\n");
3953             rc = -EPERM;
3954             break;
3955 
3956         case PACKET_MSG_PROT:
3957             dev_err(hdev->dev,
3958                 "User not allowed to use MSG_PROT\n");
3959             rc = -EPERM;
3960             break;
3961 
3962         case PACKET_CP_DMA:
3963             dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3964             rc = -EPERM;
3965             break;
3966 
3967         case PACKET_STOP:
3968             dev_err(hdev->dev, "User not allowed to use STOP\n");
3969             rc = -EPERM;
3970             break;
3971 
3972         case PACKET_MSG_LONG:
3973         case PACKET_MSG_SHORT:
3974         case PACKET_FENCE:
3975         case PACKET_NOP:
3976             memcpy(kernel_pkt, user_pkt, pkt_size);
3977             cb_patched_cur_length += pkt_size;
3978             break;
3979 
3980         default:
3981             dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3982                 pkt_id);
3983             rc = -EINVAL;
3984             break;
3985         }
3986 
3987         if (rc)
3988             break;
3989     }
3990 
3991     return rc;
3992 }
3993 
3994 static int goya_parse_cb_mmu(struct hl_device *hdev,
3995         struct hl_cs_parser *parser)
3996 {
3997     u64 handle;
3998     u32 patched_cb_size;
3999     struct hl_cb *user_cb;
4000     int rc;
4001 
4002     /*
4003      * The new CB should have space at the end for two MSG_PROT pkt:
4004      * 1. A packet that will act as a completion packet
4005      * 2. A packet that will generate MSI-X interrupt
4006      */
4007     parser->patched_cb_size = parser->user_cb_size +
4008             sizeof(struct packet_msg_prot) * 2;
4009 
4010     rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
4011                 parser->patched_cb_size, false, false,
4012                 &handle);
4013 
4014     if (rc) {
4015         dev_err(hdev->dev,
4016             "Failed to allocate patched CB for DMA CS %d\n",
4017             rc);
4018         return rc;
4019     }
4020 
4021     parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
4022     /* hl_cb_get should never fail here */
4023     if (!parser->patched_cb) {
4024         dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
4025         rc = -EFAULT;
4026         goto out;
4027     }
4028 
4029     /*
4030      * The check that parser->user_cb_size <= parser->user_cb->size was done
4031      * in validate_queue_index().
4032      */
4033     memcpy(parser->patched_cb->kernel_address,
4034         parser->user_cb->kernel_address,
4035         parser->user_cb_size);
4036 
4037     patched_cb_size = parser->patched_cb_size;
4038 
4039     /* validate patched CB instead of user CB */
4040     user_cb = parser->user_cb;
4041     parser->user_cb = parser->patched_cb;
4042     rc = goya_validate_cb(hdev, parser, true);
4043     parser->user_cb = user_cb;
4044 
4045     if (rc) {
4046         hl_cb_put(parser->patched_cb);
4047         goto out;
4048     }
4049 
4050     if (patched_cb_size != parser->patched_cb_size) {
4051         dev_err(hdev->dev, "user CB size mismatch\n");
4052         hl_cb_put(parser->patched_cb);
4053         rc = -EINVAL;
4054         goto out;
4055     }
4056 
4057 out:
4058     /*
4059      * Always call cb destroy here because we still have 1 reference
4060      * to it by calling cb_get earlier. After the job will be completed,
4061      * cb_put will release it, but here we want to remove it from the
4062      * idr
4063      */
4064     hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
4065 
4066     return rc;
4067 }
4068 
4069 static int goya_parse_cb_no_mmu(struct hl_device *hdev,
4070                 struct hl_cs_parser *parser)
4071 {
4072     u64 handle;
4073     int rc;
4074 
4075     rc = goya_validate_cb(hdev, parser, false);
4076 
4077     if (rc)
4078         goto free_userptr;
4079 
4080     rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
4081                 parser->patched_cb_size, false, false,
4082                 &handle);
4083     if (rc) {
4084         dev_err(hdev->dev,
4085             "Failed to allocate patched CB for DMA CS %d\n", rc);
4086         goto free_userptr;
4087     }
4088 
4089     parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
4090     /* hl_cb_get should never fail here */
4091     if (!parser->patched_cb) {
4092         dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
4093         rc = -EFAULT;
4094         goto out;
4095     }
4096 
4097     rc = goya_patch_cb(hdev, parser);
4098 
4099     if (rc)
4100         hl_cb_put(parser->patched_cb);
4101 
4102 out:
4103     /*
4104      * Always call cb destroy here because we still have 1 reference
4105      * to it by calling cb_get earlier. After the job will be completed,
4106      * cb_put will release it, but here we want to remove it from the
4107      * idr
4108      */
4109     hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
4110 
4111 free_userptr:
4112     if (rc)
4113         hl_userptr_delete_list(hdev, parser->job_userptr_list);
4114     return rc;
4115 }
4116 
4117 static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
4118                     struct hl_cs_parser *parser)
4119 {
4120     struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4121     struct goya_device *goya = hdev->asic_specific;
4122 
4123     if (goya->hw_cap_initialized & HW_CAP_MMU)
4124         return 0;
4125 
4126     /* For internal queue jobs, just check if CB address is valid */
4127     if (hl_mem_area_inside_range(
4128             (u64) (uintptr_t) parser->user_cb,
4129             parser->user_cb_size,
4130             asic_prop->sram_user_base_address,
4131             asic_prop->sram_end_address))
4132         return 0;
4133 
4134     if (hl_mem_area_inside_range(
4135             (u64) (uintptr_t) parser->user_cb,
4136             parser->user_cb_size,
4137             asic_prop->dram_user_base_address,
4138             asic_prop->dram_end_address))
4139         return 0;
4140 
4141     dev_err(hdev->dev,
4142         "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
4143         parser->user_cb, parser->user_cb_size);
4144 
4145     return -EFAULT;
4146 }
4147 
4148 int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4149 {
4150     struct goya_device *goya = hdev->asic_specific;
4151 
4152     if (parser->queue_type == QUEUE_TYPE_INT)
4153         return goya_parse_cb_no_ext_queue(hdev, parser);
4154 
4155     if (goya->hw_cap_initialized & HW_CAP_MMU)
4156         return goya_parse_cb_mmu(hdev, parser);
4157     else
4158         return goya_parse_cb_no_mmu(hdev, parser);
4159 }
4160 
4161 void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
4162                 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
4163                 u32 msix_vec, bool eb)
4164 {
4165     struct packet_msg_prot *cq_pkt;
4166     u32 tmp;
4167 
4168     cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4169 
4170     tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4171             (1 << GOYA_PKT_CTL_EB_SHIFT) |
4172             (1 << GOYA_PKT_CTL_MB_SHIFT);
4173     cq_pkt->ctl = cpu_to_le32(tmp);
4174     cq_pkt->value = cpu_to_le32(cq_val);
4175     cq_pkt->addr = cpu_to_le64(cq_addr);
4176 
4177     cq_pkt++;
4178 
4179     tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4180             (1 << GOYA_PKT_CTL_MB_SHIFT);
4181     cq_pkt->ctl = cpu_to_le32(tmp);
4182     cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
4183     cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
4184 }
4185 
4186 void goya_update_eq_ci(struct hl_device *hdev, u32 val)
4187 {
4188     WREG32(mmCPU_EQ_CI, val);
4189 }
4190 
4191 void goya_restore_phase_topology(struct hl_device *hdev)
4192 {
4193 
4194 }
4195 
4196 static void goya_clear_sm_regs(struct hl_device *hdev)
4197 {
4198     int i, num_of_sob_in_longs, num_of_mon_in_longs;
4199 
4200     num_of_sob_in_longs =
4201         ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4202 
4203     num_of_mon_in_longs =
4204         ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4205 
4206     for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4207         WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4208 
4209     for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4210         WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4211 
4212     /* Flush all WREG to prevent race */
4213     i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4214 }
4215 
4216 static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
4217 {
4218     dev_err(hdev->dev, "Reading via DMA is unimplemented yet\n");
4219     return -EPERM;
4220 }
4221 
4222 static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4223 {
4224     struct goya_device *goya = hdev->asic_specific;
4225 
4226     if (hdev->reset_info.hard_reset_pending)
4227         return U64_MAX;
4228 
4229     return readq(hdev->pcie_bar[DDR_BAR_ID] +
4230             (addr - goya->ddr_bar_cur_addr));
4231 }
4232 
4233 static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4234 {
4235     struct goya_device *goya = hdev->asic_specific;
4236 
4237     if (hdev->reset_info.hard_reset_pending)
4238         return;
4239 
4240     writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4241             (addr - goya->ddr_bar_cur_addr));
4242 }
4243 
4244 static const char *_goya_get_event_desc(u16 event_type)
4245 {
4246     switch (event_type) {
4247     case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4248         return "PCIe_if";
4249     case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4250     case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4251     case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4252     case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4253     case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4254     case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4255     case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4256     case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4257         return "TPC%d_ecc";
4258     case GOYA_ASYNC_EVENT_ID_MME_ECC:
4259         return "MME_ecc";
4260     case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4261         return "MME_ecc_ext";
4262     case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4263         return "MMU_ecc";
4264     case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4265         return "DMA_macro";
4266     case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4267         return "DMA_ecc";
4268     case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4269         return "CPU_if_ecc";
4270     case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4271         return "PSOC_mem";
4272     case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4273         return "PSOC_coresight";
4274     case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4275         return "SRAM%d";
4276     case GOYA_ASYNC_EVENT_ID_GIC500:
4277         return "GIC500";
4278     case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4279         return "PLL%d";
4280     case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4281         return "AXI_ecc";
4282     case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4283         return "L2_ram_ecc";
4284     case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4285         return "PSOC_gpio_05_sw_reset";
4286     case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4287         return "PSOC_gpio_10_vrhot_icrit";
4288     case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4289         return "PCIe_dec";
4290     case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4291     case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4292     case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4293     case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4294     case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4295     case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4296     case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4297     case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4298         return "TPC%d_dec";
4299     case GOYA_ASYNC_EVENT_ID_MME_WACS:
4300         return "MME_wacs";
4301     case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4302         return "MME_wacsd";
4303     case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4304         return "CPU_axi_splitter";
4305     case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4306         return "PSOC_axi_dec";
4307     case GOYA_ASYNC_EVENT_ID_PSOC:
4308         return "PSOC";
4309     case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4310     case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4311     case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4312     case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4313     case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4314     case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4315     case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4316     case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4317         return "TPC%d_krn_err";
4318     case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4319         return "TPC%d_cq";
4320     case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4321         return "TPC%d_qm";
4322     case GOYA_ASYNC_EVENT_ID_MME_QM:
4323         return "MME_qm";
4324     case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4325         return "MME_cq";
4326     case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4327         return "DMA%d_qm";
4328     case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4329         return "DMA%d_ch";
4330     case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4331     case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4332     case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4333     case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4334     case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4335     case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4336     case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4337     case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4338         return "TPC%d_bmon_spmu";
4339     case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4340         return "DMA_bm_ch%d";
4341     case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4342         return "POWER_ENV_S";
4343     case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4344         return "POWER_ENV_E";
4345     case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4346         return "THERMAL_ENV_S";
4347     case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4348         return "THERMAL_ENV_E";
4349     case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4350         return "QUEUE_OUT_OF_SYNC";
4351     default:
4352         return "N/A";
4353     }
4354 }
4355 
4356 static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4357 {
4358     u8 index;
4359 
4360     switch (event_type) {
4361     case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4362     case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4363     case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4364     case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4365     case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4366     case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4367     case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4368     case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4369         index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4370         snprintf(desc, size, _goya_get_event_desc(event_type), index);
4371         break;
4372     case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4373         index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4374         snprintf(desc, size, _goya_get_event_desc(event_type), index);
4375         break;
4376     case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4377         index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4378         snprintf(desc, size, _goya_get_event_desc(event_type), index);
4379         break;
4380     case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4381     case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4382     case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4383     case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4384     case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4385     case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4386     case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4387     case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4388         index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4389         snprintf(desc, size, _goya_get_event_desc(event_type), index);
4390         break;
4391     case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4392     case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4393     case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4394     case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4395     case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4396     case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4397     case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4398     case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4399         index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4400         snprintf(desc, size, _goya_get_event_desc(event_type), index);
4401         break;
4402     case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4403         index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4404         snprintf(desc, size, _goya_get_event_desc(event_type), index);
4405         break;
4406     case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4407         index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4408         snprintf(desc, size, _goya_get_event_desc(event_type), index);
4409         break;
4410     case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4411         index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4412         snprintf(desc, size, _goya_get_event_desc(event_type), index);
4413         break;
4414     case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4415         index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4416         snprintf(desc, size, _goya_get_event_desc(event_type), index);
4417         break;
4418     case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4419     case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4420     case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4421     case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4422     case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4423     case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4424     case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4425     case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4426         index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4427         snprintf(desc, size, _goya_get_event_desc(event_type), index);
4428         break;
4429     case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4430         index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4431         snprintf(desc, size, _goya_get_event_desc(event_type), index);
4432         break;
4433     case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4434         snprintf(desc, size, _goya_get_event_desc(event_type));
4435         break;
4436     default:
4437         snprintf(desc, size, _goya_get_event_desc(event_type));
4438         break;
4439     }
4440 }
4441 
4442 static void goya_print_razwi_info(struct hl_device *hdev)
4443 {
4444     if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4445         dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
4446         WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4447     }
4448 
4449     if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4450         dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
4451         WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4452     }
4453 
4454     if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4455         dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
4456         WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4457     }
4458 
4459     if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4460         dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
4461         WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4462     }
4463 }
4464 
4465 static void goya_print_mmu_error_info(struct hl_device *hdev)
4466 {
4467     struct goya_device *goya = hdev->asic_specific;
4468     u64 addr;
4469     u32 val;
4470 
4471     if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4472         return;
4473 
4474     val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4475     if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4476         addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4477         addr <<= 32;
4478         addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4479 
4480         dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
4481                     addr);
4482 
4483         WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4484     }
4485 }
4486 
4487 static void goya_print_out_of_sync_info(struct hl_device *hdev,
4488                     struct cpucp_pkt_sync_err *sync_err)
4489 {
4490     struct hl_hw_queue *q = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
4491 
4492     dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
4493             sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
4494 }
4495 
4496 static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4497                 bool razwi)
4498 {
4499     char desc[20] = "";
4500 
4501     goya_get_event_desc(event_type, desc, sizeof(desc));
4502     dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4503         event_type, desc);
4504 
4505     if (razwi) {
4506         goya_print_razwi_info(hdev);
4507         goya_print_mmu_error_info(hdev);
4508     }
4509 }
4510 
4511 static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4512         size_t irq_arr_size)
4513 {
4514     struct cpucp_unmask_irq_arr_packet *pkt;
4515     size_t total_pkt_size;
4516     u64 result;
4517     int rc;
4518     int irq_num_entries, irq_arr_index;
4519     __le32 *goya_irq_arr;
4520 
4521     total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
4522             irq_arr_size;
4523 
4524     /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
4525     total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4526 
4527     /* total_pkt_size is casted to u16 later on */
4528     if (total_pkt_size > USHRT_MAX) {
4529         dev_err(hdev->dev, "too many elements in IRQ array\n");
4530         return -EINVAL;
4531     }
4532 
4533     pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4534     if (!pkt)
4535         return -ENOMEM;
4536 
4537     irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4538     pkt->length = cpu_to_le32(irq_num_entries);
4539 
4540     /* We must perform any necessary endianness conversation on the irq
4541      * array being passed to the goya hardware
4542      */
4543     for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4544             irq_arr_index < irq_num_entries ; irq_arr_index++)
4545         goya_irq_arr[irq_arr_index] =
4546                 cpu_to_le32(irq_arr[irq_arr_index]);
4547 
4548     pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4549                         CPUCP_PKT_CTL_OPCODE_SHIFT);
4550 
4551     rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
4552                         total_pkt_size, 0, &result);
4553 
4554     if (rc)
4555         dev_err(hdev->dev, "failed to unmask IRQ array\n");
4556 
4557     kfree(pkt);
4558 
4559     return rc;
4560 }
4561 
4562 static int goya_non_hard_reset_late_init(struct hl_device *hdev)
4563 {
4564     /*
4565      * Unmask all IRQs since some could have been received
4566      * during the soft reset
4567      */
4568     return goya_unmask_irq_arr(hdev, goya_all_events,
4569                     sizeof(goya_all_events));
4570 }
4571 
4572 static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4573 {
4574     struct cpucp_packet pkt;
4575     u64 result;
4576     int rc;
4577 
4578     memset(&pkt, 0, sizeof(pkt));
4579 
4580     pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
4581                 CPUCP_PKT_CTL_OPCODE_SHIFT);
4582     pkt.value = cpu_to_le64(event_type);
4583 
4584     rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4585                         0, &result);
4586 
4587     if (rc)
4588         dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4589 
4590     return rc;
4591 }
4592 
4593 static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
4594 {
4595     ktime_t zero_time = ktime_set(0, 0);
4596 
4597     mutex_lock(&hdev->clk_throttling.lock);
4598 
4599     switch (event_type) {
4600     case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4601         hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
4602         hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
4603         hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
4604         hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
4605         dev_info_ratelimited(hdev->dev,
4606             "Clock throttling due to power consumption\n");
4607         break;
4608 
4609     case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4610         hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
4611         hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
4612         dev_info_ratelimited(hdev->dev,
4613             "Power envelop is safe, back to optimal clock\n");
4614         break;
4615 
4616     case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4617         hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
4618         hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
4619         hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
4620         hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
4621         dev_info_ratelimited(hdev->dev,
4622             "Clock throttling due to overheating\n");
4623         break;
4624 
4625     case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4626         hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
4627         hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
4628         dev_info_ratelimited(hdev->dev,
4629             "Thermal envelop is safe, back to optimal clock\n");
4630         break;
4631 
4632     default:
4633         dev_err(hdev->dev, "Received invalid clock change event %d\n",
4634             event_type);
4635         break;
4636     }
4637 
4638     mutex_unlock(&hdev->clk_throttling.lock);
4639 }
4640 
4641 void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4642 {
4643     u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4644     u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4645                 >> EQ_CTL_EVENT_TYPE_SHIFT);
4646     struct goya_device *goya = hdev->asic_specific;
4647 
4648     if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
4649         dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
4650                 event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
4651         return;
4652     }
4653 
4654     goya->events_stat[event_type]++;
4655     goya->events_stat_aggregate[event_type]++;
4656 
4657     switch (event_type) {
4658     case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4659     case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4660     case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4661     case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4662     case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4663     case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4664     case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4665     case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4666     case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4667     case GOYA_ASYNC_EVENT_ID_MME_ECC:
4668     case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4669     case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4670     case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4671     case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4672     case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4673     case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4674     case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4675     case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4676     case GOYA_ASYNC_EVENT_ID_GIC500:
4677     case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4678     case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4679     case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4680         goya_print_irq_info(hdev, event_type, false);
4681         if (hdev->hard_reset_on_fw_events)
4682             hl_device_reset(hdev, (HL_DRV_RESET_HARD |
4683                         HL_DRV_RESET_FW_FATAL_ERR));
4684         break;
4685 
4686     case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4687         goya_print_irq_info(hdev, event_type, false);
4688         if (hdev->hard_reset_on_fw_events)
4689             hl_device_reset(hdev, HL_DRV_RESET_HARD);
4690         break;
4691 
4692     case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4693     case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4694     case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4695     case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4696     case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4697     case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4698     case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4699     case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4700     case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4701     case GOYA_ASYNC_EVENT_ID_MME_WACS:
4702     case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4703     case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4704     case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4705     case GOYA_ASYNC_EVENT_ID_PSOC:
4706     case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4707     case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4708     case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4709     case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4710     case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4711     case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4712     case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4713     case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4714     case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4715     case GOYA_ASYNC_EVENT_ID_MME_QM:
4716     case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4717     case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4718     case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4719         goya_print_irq_info(hdev, event_type, true);
4720         goya_unmask_irq(hdev, event_type);
4721         break;
4722 
4723     case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4724     case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4725     case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4726     case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4727     case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4728     case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4729     case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4730     case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4731     case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4732     case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4733         goya_print_irq_info(hdev, event_type, false);
4734         goya_unmask_irq(hdev, event_type);
4735         break;
4736 
4737     case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4738     case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4739     case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4740     case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4741         goya_print_clk_change_info(hdev, event_type);
4742         goya_unmask_irq(hdev, event_type);
4743         break;
4744 
4745     case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4746         goya_print_irq_info(hdev, event_type, false);
4747         goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
4748         if (hdev->hard_reset_on_fw_events)
4749             hl_device_reset(hdev, HL_DRV_RESET_HARD);
4750         else
4751             hl_fw_unmask_irq(hdev, event_type);
4752         break;
4753 
4754     default:
4755         dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4756                 event_type);
4757         break;
4758     }
4759 }
4760 
4761 void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
4762 {
4763     struct goya_device *goya = hdev->asic_specific;
4764 
4765     if (aggregate) {
4766         *size = (u32) sizeof(goya->events_stat_aggregate);
4767         return goya->events_stat_aggregate;
4768     }
4769 
4770     *size = (u32) sizeof(goya->events_stat);
4771     return goya->events_stat;
4772 }
4773 
4774 static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
4775                 u64 val, bool is_dram)
4776 {
4777     struct packet_lin_dma *lin_dma_pkt;
4778     struct hl_cs_job *job;
4779     u32 cb_size, ctl;
4780     struct hl_cb *cb;
4781     int rc, lin_dma_pkts_cnt;
4782 
4783     lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4784     cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4785                         sizeof(struct packet_msg_prot);
4786     cb = hl_cb_kernel_create(hdev, cb_size, false);
4787     if (!cb)
4788         return -ENOMEM;
4789 
4790     lin_dma_pkt = cb->kernel_address;
4791 
4792     do {
4793         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4794 
4795         ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4796                 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4797                 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4798                 (1 << GOYA_PKT_CTL_RB_SHIFT) |
4799                 (1 << GOYA_PKT_CTL_MB_SHIFT));
4800         ctl |= (is_dram ? HL_DMA_HOST_TO_DRAM : HL_DMA_HOST_TO_SRAM) <<
4801                 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4802         lin_dma_pkt->ctl = cpu_to_le32(ctl);
4803 
4804         lin_dma_pkt->src_addr = cpu_to_le64(val);
4805         lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4806         if (lin_dma_pkts_cnt > 1)
4807             lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4808         else
4809             lin_dma_pkt->tsize = cpu_to_le32(size);
4810 
4811         size -= SZ_2G;
4812         addr += SZ_2G;
4813         lin_dma_pkt++;
4814     } while (--lin_dma_pkts_cnt);
4815 
4816     job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4817     if (!job) {
4818         dev_err(hdev->dev, "Failed to allocate a new job\n");
4819         rc = -ENOMEM;
4820         goto release_cb;
4821     }
4822 
4823     job->id = 0;
4824     job->user_cb = cb;
4825     atomic_inc(&job->user_cb->cs_cnt);
4826     job->user_cb_size = cb_size;
4827     job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4828     job->patched_cb = job->user_cb;
4829     job->job_cb_size = job->user_cb_size;
4830 
4831     hl_debugfs_add_job(hdev, job);
4832 
4833     rc = goya_send_job_on_qman0(hdev, job);
4834 
4835     hl_debugfs_remove_job(hdev, job);
4836     kfree(job);
4837     atomic_dec(&cb->cs_cnt);
4838 
4839 release_cb:
4840     hl_cb_put(cb);
4841     hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
4842 
4843     return rc;
4844 }
4845 
4846 int goya_context_switch(struct hl_device *hdev, u32 asid)
4847 {
4848     struct asic_fixed_properties *prop = &hdev->asic_prop;
4849     u64 addr = prop->sram_base_address, sob_addr;
4850     u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4851     u64 val = 0x7777777777777777ull;
4852     int rc, dma_id;
4853     u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4854                     mmDMA_CH_0_WR_COMP_ADDR_LO;
4855 
4856     rc = goya_memset_device_memory(hdev, addr, size, val, false);
4857     if (rc) {
4858         dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4859         return rc;
4860     }
4861 
4862     /* we need to reset registers that the user is allowed to change */
4863     sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4864     WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4865 
4866     for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4867         sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4868                             (dma_id - 1) * 4;
4869         WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4870                         lower_32_bits(sob_addr));
4871     }
4872 
4873     WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
4874 
4875     goya_clear_sm_regs(hdev);
4876 
4877     return 0;
4878 }
4879 
4880 static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4881 {
4882     struct asic_fixed_properties *prop = &hdev->asic_prop;
4883     struct goya_device *goya = hdev->asic_specific;
4884     u64 addr = prop->mmu_pgt_addr;
4885     u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4886             MMU_CACHE_MNG_SIZE;
4887 
4888     if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4889         return 0;
4890 
4891     return goya_memset_device_memory(hdev, addr, size, 0, true);
4892 }
4893 
4894 static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4895 {
4896     struct goya_device *goya = hdev->asic_specific;
4897     u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4898     u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4899     u64 val = 0x9999999999999999ull;
4900 
4901     if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4902         return 0;
4903 
4904     return goya_memset_device_memory(hdev, addr, size, val, true);
4905 }
4906 
4907 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4908 {
4909     struct asic_fixed_properties *prop = &hdev->asic_prop;
4910     struct goya_device *goya = hdev->asic_specific;
4911     s64 off, cpu_off;
4912     int rc;
4913 
4914     if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4915         return 0;
4916 
4917     for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
4918         rc = hl_mmu_map_page(hdev->kernel_ctx,
4919             prop->dram_base_address + off,
4920             prop->dram_base_address + off, PAGE_SIZE_2MB,
4921             (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
4922         if (rc) {
4923             dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4924                 prop->dram_base_address + off);
4925             goto unmap;
4926         }
4927     }
4928 
4929     if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4930         rc = hl_mmu_map_page(hdev->kernel_ctx,
4931             VA_CPU_ACCESSIBLE_MEM_ADDR,
4932             hdev->cpu_accessible_dma_address,
4933             PAGE_SIZE_2MB, true);
4934 
4935         if (rc) {
4936             dev_err(hdev->dev,
4937                 "Map failed for CPU accessible memory\n");
4938             off -= PAGE_SIZE_2MB;
4939             goto unmap;
4940         }
4941     } else {
4942         for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
4943             rc = hl_mmu_map_page(hdev->kernel_ctx,
4944                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4945                 hdev->cpu_accessible_dma_address + cpu_off,
4946                 PAGE_SIZE_4KB, true);
4947             if (rc) {
4948                 dev_err(hdev->dev,
4949                     "Map failed for CPU accessible memory\n");
4950                 cpu_off -= PAGE_SIZE_4KB;
4951                 goto unmap_cpu;
4952             }
4953         }
4954     }
4955 
4956     goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4957     goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4958     WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4959     WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4960 
4961     /* Make sure configuration is flushed to device */
4962     RREG32(mmCPU_IF_AWUSER_OVR_EN);
4963 
4964     goya->device_cpu_mmu_mappings_done = true;
4965 
4966     return 0;
4967 
4968 unmap_cpu:
4969     for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
4970         if (hl_mmu_unmap_page(hdev->kernel_ctx,
4971                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4972                 PAGE_SIZE_4KB, true))
4973             dev_warn_ratelimited(hdev->dev,
4974                 "failed to unmap address 0x%llx\n",
4975                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4976 unmap:
4977     for (; off >= 0 ; off -= PAGE_SIZE_2MB)
4978         if (hl_mmu_unmap_page(hdev->kernel_ctx,
4979                 prop->dram_base_address + off, PAGE_SIZE_2MB,
4980                 true))
4981             dev_warn_ratelimited(hdev->dev,
4982                 "failed to unmap address 0x%llx\n",
4983                 prop->dram_base_address + off);
4984 
4985     return rc;
4986 }
4987 
4988 void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4989 {
4990     struct asic_fixed_properties *prop = &hdev->asic_prop;
4991     struct goya_device *goya = hdev->asic_specific;
4992     u32 off, cpu_off;
4993 
4994     if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4995         return;
4996 
4997     if (!goya->device_cpu_mmu_mappings_done)
4998         return;
4999 
5000     WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
5001     WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
5002 
5003     if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
5004         if (hl_mmu_unmap_page(hdev->kernel_ctx,
5005                 VA_CPU_ACCESSIBLE_MEM_ADDR,
5006                 PAGE_SIZE_2MB, true))
5007             dev_warn(hdev->dev,
5008                 "Failed to unmap CPU accessible memory\n");
5009     } else {
5010         for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
5011             if (hl_mmu_unmap_page(hdev->kernel_ctx,
5012                     VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
5013                     PAGE_SIZE_4KB,
5014                     (cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
5015                 dev_warn_ratelimited(hdev->dev,
5016                     "failed to unmap address 0x%llx\n",
5017                     VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
5018     }
5019 
5020     for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
5021         if (hl_mmu_unmap_page(hdev->kernel_ctx,
5022                 prop->dram_base_address + off, PAGE_SIZE_2MB,
5023                 (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
5024             dev_warn_ratelimited(hdev->dev,
5025                     "Failed to unmap address 0x%llx\n",
5026                     prop->dram_base_address + off);
5027 
5028     goya->device_cpu_mmu_mappings_done = false;
5029 }
5030 
5031 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
5032 {
5033     struct goya_device *goya = hdev->asic_specific;
5034     int i;
5035 
5036     if (!(goya->hw_cap_initialized & HW_CAP_MMU))
5037         return;
5038 
5039     if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
5040         dev_crit(hdev->dev, "asid %u is too big\n", asid);
5041         return;
5042     }
5043 
5044     /* zero the MMBP and ASID bits and then set the ASID */
5045     for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
5046         goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
5047 }
5048 
5049 static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5050                     u32 flags)
5051 {
5052     struct goya_device *goya = hdev->asic_specific;
5053     u32 status, timeout_usec;
5054     int rc;
5055 
5056     if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5057         hdev->reset_info.hard_reset_pending)
5058         return 0;
5059 
5060     /* no need in L1 only invalidation in Goya */
5061     if (!is_hard)
5062         return 0;
5063 
5064     if (hdev->pldm)
5065         timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5066     else
5067         timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5068 
5069     /* L0 & L1 invalidation */
5070     WREG32(mmSTLB_INV_ALL_START, 1);
5071 
5072     rc = hl_poll_timeout(
5073         hdev,
5074         mmSTLB_INV_ALL_START,
5075         status,
5076         !status,
5077         1000,
5078         timeout_usec);
5079 
5080     return rc;
5081 }
5082 
5083 static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
5084                         bool is_hard, u32 flags,
5085                         u32 asid, u64 va, u64 size)
5086 {
5087     /* Treat as invalidate all because there is no range invalidation
5088      * in Goya
5089      */
5090     return hl_mmu_invalidate_cache(hdev, is_hard, flags);
5091 }
5092 
5093 int goya_send_heartbeat(struct hl_device *hdev)
5094 {
5095     struct goya_device *goya = hdev->asic_specific;
5096 
5097     if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5098         return 0;
5099 
5100     return hl_fw_send_heartbeat(hdev);
5101 }
5102 
5103 int goya_cpucp_info_get(struct hl_device *hdev)
5104 {
5105     struct goya_device *goya = hdev->asic_specific;
5106     struct asic_fixed_properties *prop = &hdev->asic_prop;
5107     u64 dram_size;
5108     int rc;
5109 
5110     if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5111         return 0;
5112 
5113     rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
5114                     mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
5115                     mmCPU_BOOT_ERR1);
5116     if (rc)
5117         return rc;
5118 
5119     dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
5120     if (dram_size) {
5121         if ((!is_power_of_2(dram_size)) ||
5122                 (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5123             dev_err(hdev->dev,
5124                 "F/W reported invalid DRAM size %llu. Trying to use default size\n",
5125                 dram_size);
5126             dram_size = DRAM_PHYS_DEFAULT_SIZE;
5127         }
5128 
5129         prop->dram_size = dram_size;
5130         prop->dram_end_address = prop->dram_base_address + dram_size;
5131     }
5132 
5133     if (!strlen(prop->cpucp_info.card_name))
5134         strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
5135                 CARD_NAME_MAX_LEN);
5136 
5137     return 0;
5138 }
5139 
5140 static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
5141                     u8 mask_len, struct seq_file *s)
5142 {
5143     const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5144     const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5145     unsigned long *mask = (unsigned long *)mask_arr;
5146     u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5147         mme_arch_sts;
5148     bool is_idle = true, is_eng_idle;
5149     u64 offset;
5150     int i;
5151 
5152     if (s)
5153         seq_puts(s, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
5154                 "---  -------  ------------  -------------\n");
5155 
5156     offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5157 
5158     for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5159         qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5160         dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5161         is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5162                 IS_DMA_IDLE(dma_core_sts0);
5163         is_idle &= is_eng_idle;
5164 
5165         if (mask && !is_eng_idle)
5166             set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
5167         if (s)
5168             seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
5169                     qm_glbl_sts0, dma_core_sts0);
5170     }
5171 
5172     if (s)
5173         seq_puts(s,
5174             "\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
5175             "---  -------  ------------  --------------  ----------\n");
5176 
5177     offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5178 
5179     for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5180         qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5181         cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5182         tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5183         is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5184                 IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5185                 IS_TPC_IDLE(tpc_cfg_sts);
5186         is_idle &= is_eng_idle;
5187 
5188         if (mask && !is_eng_idle)
5189             set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
5190         if (s)
5191             seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
5192                 qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5193     }
5194 
5195     if (s)
5196         seq_puts(s,
5197             "\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
5198             "---  -------  ------------  --------------  -----------\n");
5199 
5200     qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5201     cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5202     mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5203     is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5204             IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5205             IS_MME_IDLE(mme_arch_sts);
5206     is_idle &= is_eng_idle;
5207 
5208     if (mask && !is_eng_idle)
5209         set_bit(GOYA_ENGINE_ID_MME_0, mask);
5210     if (s) {
5211         seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5212                 cmdq_glbl_sts0, mme_arch_sts);
5213         seq_puts(s, "\n");
5214     }
5215 
5216     return is_idle;
5217 }
5218 
5219 static void goya_hw_queues_lock(struct hl_device *hdev)
5220     __acquires(&goya->hw_queues_lock)
5221 {
5222     struct goya_device *goya = hdev->asic_specific;
5223 
5224     spin_lock(&goya->hw_queues_lock);
5225 }
5226 
5227 static void goya_hw_queues_unlock(struct hl_device *hdev)
5228     __releases(&goya->hw_queues_lock)
5229 {
5230     struct goya_device *goya = hdev->asic_specific;
5231 
5232     spin_unlock(&goya->hw_queues_lock);
5233 }
5234 
5235 static u32 goya_get_pci_id(struct hl_device *hdev)
5236 {
5237     return hdev->pdev->device;
5238 }
5239 
5240 static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5241                 size_t max_size)
5242 {
5243     struct goya_device *goya = hdev->asic_specific;
5244 
5245     if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5246         return 0;
5247 
5248     return hl_fw_get_eeprom_data(hdev, data, max_size);
5249 }
5250 
5251 static void goya_cpu_init_scrambler_dram(struct hl_device *hdev)
5252 {
5253 
5254 }
5255 
5256 static int goya_ctx_init(struct hl_ctx *ctx)
5257 {
5258     if (ctx->asid != HL_KERNEL_ASID_ID)
5259         goya_mmu_prepare(ctx->hdev, ctx->asid);
5260 
5261     return 0;
5262 }
5263 
5264 static int goya_pre_schedule_cs(struct hl_cs *cs)
5265 {
5266     return 0;
5267 }
5268 
5269 u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
5270 {
5271     return cq_idx;
5272 }
5273 
5274 static u32 goya_get_signal_cb_size(struct hl_device *hdev)
5275 {
5276     return 0;
5277 }
5278 
5279 static u32 goya_get_wait_cb_size(struct hl_device *hdev)
5280 {
5281     return 0;
5282 }
5283 
5284 static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
5285                 u32 size, bool eb)
5286 {
5287     return 0;
5288 }
5289 
5290 static u32 goya_gen_wait_cb(struct hl_device *hdev,
5291         struct hl_gen_wait_properties *prop)
5292 {
5293     return 0;
5294 }
5295 
5296 static void goya_reset_sob(struct hl_device *hdev, void *data)
5297 {
5298 
5299 }
5300 
5301 static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
5302 {
5303 
5304 }
5305 
5306 u64 goya_get_device_time(struct hl_device *hdev)
5307 {
5308     u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
5309 
5310     return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
5311 }
5312 
5313 static int goya_collective_wait_init_cs(struct hl_cs *cs)
5314 {
5315     return 0;
5316 }
5317 
5318 static int goya_collective_wait_create_jobs(struct hl_device *hdev,
5319         struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
5320         u32 collective_engine_id, u32 encaps_signal_offset)
5321 {
5322     return -EINVAL;
5323 }
5324 
5325 static void goya_ctx_fini(struct hl_ctx *ctx)
5326 {
5327 
5328 }
5329 
5330 static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
5331             u32 *block_size, u32 *block_id)
5332 {
5333     return -EPERM;
5334 }
5335 
5336 static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5337                 u32 block_id, u32 block_size)
5338 {
5339     return -EPERM;
5340 }
5341 
5342 static void goya_enable_events_from_fw(struct hl_device *hdev)
5343 {
5344     WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
5345             GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
5346 }
5347 
5348 static int goya_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
5349 {
5350     return -EINVAL;
5351 }
5352 
5353 static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
5354 {
5355     switch (pll_idx) {
5356     case HL_GOYA_CPU_PLL: return CPU_PLL;
5357     case HL_GOYA_PCI_PLL: return PCI_PLL;
5358     case HL_GOYA_MME_PLL: return MME_PLL;
5359     case HL_GOYA_TPC_PLL: return TPC_PLL;
5360     case HL_GOYA_IC_PLL: return IC_PLL;
5361     case HL_GOYA_MC_PLL: return MC_PLL;
5362     case HL_GOYA_EMMC_PLL: return EMMC_PLL;
5363     default: return -EINVAL;
5364     }
5365 }
5366 
5367 static int goya_gen_sync_to_engine_map(struct hl_device *hdev,
5368                 struct hl_sync_to_engine_map *map)
5369 {
5370     /* Not implemented */
5371     return 0;
5372 }
5373 
5374 static int goya_monitor_valid(struct hl_mon_state_dump *mon)
5375 {
5376     /* Not implemented */
5377     return 0;
5378 }
5379 
5380 static int goya_print_single_monitor(char **buf, size_t *size, size_t *offset,
5381                 struct hl_device *hdev,
5382                 struct hl_mon_state_dump *mon)
5383 {
5384     /* Not implemented */
5385     return 0;
5386 }
5387 
5388 
5389 static int goya_print_fences_single_engine(
5390     struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
5391     enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
5392     size_t *size, size_t *offset)
5393 {
5394     /* Not implemented */
5395     return 0;
5396 }
5397 
5398 
5399 static struct hl_state_dump_specs_funcs goya_state_dump_funcs = {
5400     .monitor_valid = goya_monitor_valid,
5401     .print_single_monitor = goya_print_single_monitor,
5402     .gen_sync_to_engine_map = goya_gen_sync_to_engine_map,
5403     .print_fences_single_engine = goya_print_fences_single_engine,
5404 };
5405 
5406 static void goya_state_dump_init(struct hl_device *hdev)
5407 {
5408     /* Not implemented */
5409     hdev->state_dump_specs.props = goya_state_dump_specs_props;
5410     hdev->state_dump_specs.funcs = goya_state_dump_funcs;
5411 }
5412 
5413 static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id)
5414 {
5415     return 0;
5416 }
5417 
5418 static u32 *goya_get_stream_master_qid_arr(void)
5419 {
5420     return NULL;
5421 }
5422 
5423 static int goya_get_monitor_dump(struct hl_device *hdev, void *data)
5424 {
5425     return -EOPNOTSUPP;
5426 }
5427 
5428 static void goya_check_if_razwi_happened(struct hl_device *hdev)
5429 {
5430 }
5431 
5432 static int goya_scrub_device_dram(struct hl_device *hdev, u64 val)
5433 {
5434     return -EOPNOTSUPP;
5435 }
5436 
5437 static const struct hl_asic_funcs goya_funcs = {
5438     .early_init = goya_early_init,
5439     .early_fini = goya_early_fini,
5440     .late_init = goya_late_init,
5441     .late_fini = goya_late_fini,
5442     .sw_init = goya_sw_init,
5443     .sw_fini = goya_sw_fini,
5444     .hw_init = goya_hw_init,
5445     .hw_fini = goya_hw_fini,
5446     .halt_engines = goya_halt_engines,
5447     .suspend = goya_suspend,
5448     .resume = goya_resume,
5449     .mmap = goya_mmap,
5450     .ring_doorbell = goya_ring_doorbell,
5451     .pqe_write = goya_pqe_write,
5452     .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5453     .asic_dma_free_coherent = goya_dma_free_coherent,
5454     .scrub_device_mem = goya_scrub_device_mem,
5455     .scrub_device_dram = goya_scrub_device_dram,
5456     .get_int_queue_base = goya_get_int_queue_base,
5457     .test_queues = goya_test_queues,
5458     .asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5459     .asic_dma_pool_free = goya_dma_pool_free,
5460     .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5461     .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5462     .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
5463     .cs_parser = goya_cs_parser,
5464     .asic_dma_map_sgtable = hl_dma_map_sgtable,
5465     .add_end_of_cb_packets = goya_add_end_of_cb_packets,
5466     .update_eq_ci = goya_update_eq_ci,
5467     .context_switch = goya_context_switch,
5468     .restore_phase_topology = goya_restore_phase_topology,
5469     .debugfs_read_dma = goya_debugfs_read_dma,
5470     .add_device_attr = goya_add_device_attr,
5471     .handle_eqe = goya_handle_eqe,
5472     .get_events_stat = goya_get_events_stat,
5473     .read_pte = goya_read_pte,
5474     .write_pte = goya_write_pte,
5475     .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5476     .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5477     .mmu_prefetch_cache_range = NULL,
5478     .send_heartbeat = goya_send_heartbeat,
5479     .debug_coresight = goya_debug_coresight,
5480     .is_device_idle = goya_is_device_idle,
5481     .non_hard_reset_late_init = goya_non_hard_reset_late_init,
5482     .hw_queues_lock = goya_hw_queues_lock,
5483     .hw_queues_unlock = goya_hw_queues_unlock,
5484     .kdma_lock = NULL,
5485     .kdma_unlock = NULL,
5486     .get_pci_id = goya_get_pci_id,
5487     .get_eeprom_data = goya_get_eeprom_data,
5488     .get_monitor_dump = goya_get_monitor_dump,
5489     .send_cpu_message = goya_send_cpu_message,
5490     .pci_bars_map = goya_pci_bars_map,
5491     .init_iatu = goya_init_iatu,
5492     .rreg = hl_rreg,
5493     .wreg = hl_wreg,
5494     .halt_coresight = goya_halt_coresight,
5495     .ctx_init = goya_ctx_init,
5496     .ctx_fini = goya_ctx_fini,
5497     .pre_schedule_cs = goya_pre_schedule_cs,
5498     .get_queue_id_for_cq = goya_get_queue_id_for_cq,
5499     .load_firmware_to_device = goya_load_firmware_to_device,
5500     .load_boot_fit_to_device = goya_load_boot_fit_to_device,
5501     .get_signal_cb_size = goya_get_signal_cb_size,
5502     .get_wait_cb_size = goya_get_wait_cb_size,
5503     .gen_signal_cb = goya_gen_signal_cb,
5504     .gen_wait_cb = goya_gen_wait_cb,
5505     .reset_sob = goya_reset_sob,
5506     .reset_sob_group = goya_reset_sob_group,
5507     .get_device_time = goya_get_device_time,
5508     .pb_print_security_errors = NULL,
5509     .collective_wait_init_cs = goya_collective_wait_init_cs,
5510     .collective_wait_create_jobs = goya_collective_wait_create_jobs,
5511     .get_dec_base_addr = NULL,
5512     .scramble_addr = hl_mmu_scramble_addr,
5513     .descramble_addr = hl_mmu_descramble_addr,
5514     .ack_protection_bits_errors = goya_ack_protection_bits_errors,
5515     .get_hw_block_id = goya_get_hw_block_id,
5516     .hw_block_mmap = goya_block_mmap,
5517     .enable_events_from_fw = goya_enable_events_from_fw,
5518     .ack_mmu_errors = goya_ack_mmu_page_fault_or_access_error,
5519     .map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
5520     .init_firmware_preload_params = goya_init_firmware_preload_params,
5521     .init_firmware_loader = goya_init_firmware_loader,
5522     .init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
5523     .state_dump_init = goya_state_dump_init,
5524     .get_sob_addr = &goya_get_sob_addr,
5525     .set_pci_memory_regions = goya_set_pci_memory_regions,
5526     .get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
5527     .check_if_razwi_happened = goya_check_if_razwi_happened,
5528     .mmu_get_real_page_size = hl_mmu_get_real_page_size,
5529     .access_dev_mem = hl_access_dev_mem,
5530     .set_dram_bar_base = goya_set_ddr_bar_base,
5531 };
5532 
5533 /*
5534  * goya_set_asic_funcs - set Goya function pointers
5535  *
5536  * @*hdev: pointer to hl_device structure
5537  *
5538  */
5539 void goya_set_asic_funcs(struct hl_device *hdev)
5540 {
5541     hdev->asic_funcs = &goya_funcs;
5542 }