0001
0002
0003
0004
0005
0006
0007
0008 #include "goyaP.h"
0009 #include "../include/hw_ip/mmu/mmu_general.h"
0010 #include "../include/hw_ip/mmu/mmu_v1_0.h"
0011 #include "../include/goya/asic_reg/goya_masks.h"
0012 #include "../include/goya/goya_reg_map.h"
0013
0014 #include <linux/pci.h>
0015 #include <linux/hwmon.h>
0016 #include <linux/iommu.h>
0017 #include <linux/seq_file.h>
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073 #define GOYA_BOOT_FIT_FILE "habanalabs/goya/goya-boot-fit.itb"
0074 #define GOYA_LINUX_FW_FILE "habanalabs/goya/goya-fit.itb"
0075
0076 #define GOYA_MMU_REGS_NUM 63
0077
0078 #define GOYA_DMA_POOL_BLK_SIZE 0x100
0079
0080 #define GOYA_RESET_TIMEOUT_MSEC 500
0081 #define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000
0082 #define GOYA_RESET_WAIT_MSEC 1
0083 #define GOYA_CPU_RESET_WAIT_MSEC 100
0084 #define GOYA_PLDM_RESET_WAIT_MSEC 1000
0085 #define GOYA_TEST_QUEUE_WAIT_USEC 100000
0086 #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
0087 #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
0088 #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000
0089 #define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000
0090 #define GOYA_WAIT_FOR_BL_TIMEOUT_USEC 15000000
0091
0092 #define GOYA_QMAN0_FENCE_VAL 0xD169B243
0093
0094 #define GOYA_MAX_STRING_LEN 20
0095
0096 #define GOYA_CB_POOL_CB_CNT 512
0097 #define GOYA_CB_POOL_CB_SIZE 0x20000
0098
0099 #define IS_QM_IDLE(engine, qm_glbl_sts0) \
0100 (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
0101 #define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0)
0102 #define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0)
0103 #define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0)
0104
0105 #define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
0106 (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
0107 engine##_CMDQ_IDLE_MASK)
0108 #define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
0109 IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
0110 #define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
0111 IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
0112
0113 #define IS_DMA_IDLE(dma_core_sts0) \
0114 !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
0115
0116 #define IS_TPC_IDLE(tpc_cfg_sts) \
0117 (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
0118
0119 #define IS_MME_IDLE(mme_arch_sts) \
0120 (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
0121
0122 static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
0123 "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
0124 "goya cq 4", "goya cpu eq"
0125 };
0126
0127 static u16 goya_packet_sizes[MAX_PACKET_ID] = {
0128 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
0129 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
0130 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
0131 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
0132 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
0133 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
0134 [PACKET_FENCE] = sizeof(struct packet_fence),
0135 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
0136 [PACKET_NOP] = sizeof(struct packet_nop),
0137 [PACKET_STOP] = sizeof(struct packet_stop)
0138 };
0139
0140 static inline bool validate_packet_id(enum packet_id id)
0141 {
0142 switch (id) {
0143 case PACKET_WREG_32:
0144 case PACKET_WREG_BULK:
0145 case PACKET_MSG_LONG:
0146 case PACKET_MSG_SHORT:
0147 case PACKET_CP_DMA:
0148 case PACKET_MSG_PROT:
0149 case PACKET_FENCE:
0150 case PACKET_LIN_DMA:
0151 case PACKET_NOP:
0152 case PACKET_STOP:
0153 return true;
0154 default:
0155 return false;
0156 }
0157 }
0158
0159 static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
0160 mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
0161 mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
0162 mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
0163 mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
0164 mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
0165 mmTPC0_QM_GLBL_SECURE_PROPS,
0166 mmTPC0_QM_GLBL_NON_SECURE_PROPS,
0167 mmTPC0_CMDQ_GLBL_SECURE_PROPS,
0168 mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
0169 mmTPC0_CFG_ARUSER,
0170 mmTPC0_CFG_AWUSER,
0171 mmTPC1_QM_GLBL_SECURE_PROPS,
0172 mmTPC1_QM_GLBL_NON_SECURE_PROPS,
0173 mmTPC1_CMDQ_GLBL_SECURE_PROPS,
0174 mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
0175 mmTPC1_CFG_ARUSER,
0176 mmTPC1_CFG_AWUSER,
0177 mmTPC2_QM_GLBL_SECURE_PROPS,
0178 mmTPC2_QM_GLBL_NON_SECURE_PROPS,
0179 mmTPC2_CMDQ_GLBL_SECURE_PROPS,
0180 mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
0181 mmTPC2_CFG_ARUSER,
0182 mmTPC2_CFG_AWUSER,
0183 mmTPC3_QM_GLBL_SECURE_PROPS,
0184 mmTPC3_QM_GLBL_NON_SECURE_PROPS,
0185 mmTPC3_CMDQ_GLBL_SECURE_PROPS,
0186 mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
0187 mmTPC3_CFG_ARUSER,
0188 mmTPC3_CFG_AWUSER,
0189 mmTPC4_QM_GLBL_SECURE_PROPS,
0190 mmTPC4_QM_GLBL_NON_SECURE_PROPS,
0191 mmTPC4_CMDQ_GLBL_SECURE_PROPS,
0192 mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
0193 mmTPC4_CFG_ARUSER,
0194 mmTPC4_CFG_AWUSER,
0195 mmTPC5_QM_GLBL_SECURE_PROPS,
0196 mmTPC5_QM_GLBL_NON_SECURE_PROPS,
0197 mmTPC5_CMDQ_GLBL_SECURE_PROPS,
0198 mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
0199 mmTPC5_CFG_ARUSER,
0200 mmTPC5_CFG_AWUSER,
0201 mmTPC6_QM_GLBL_SECURE_PROPS,
0202 mmTPC6_QM_GLBL_NON_SECURE_PROPS,
0203 mmTPC6_CMDQ_GLBL_SECURE_PROPS,
0204 mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
0205 mmTPC6_CFG_ARUSER,
0206 mmTPC6_CFG_AWUSER,
0207 mmTPC7_QM_GLBL_SECURE_PROPS,
0208 mmTPC7_QM_GLBL_NON_SECURE_PROPS,
0209 mmTPC7_CMDQ_GLBL_SECURE_PROPS,
0210 mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
0211 mmTPC7_CFG_ARUSER,
0212 mmTPC7_CFG_AWUSER,
0213 mmMME_QM_GLBL_SECURE_PROPS,
0214 mmMME_QM_GLBL_NON_SECURE_PROPS,
0215 mmMME_CMDQ_GLBL_SECURE_PROPS,
0216 mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
0217 mmMME_SBA_CONTROL_DATA,
0218 mmMME_SBB_CONTROL_DATA,
0219 mmMME_SBC_CONTROL_DATA,
0220 mmMME_WBC_CONTROL_DATA,
0221 mmPCIE_WRAP_PSOC_ARUSER,
0222 mmPCIE_WRAP_PSOC_AWUSER
0223 };
0224
0225 static u32 goya_all_events[] = {
0226 GOYA_ASYNC_EVENT_ID_PCIE_IF,
0227 GOYA_ASYNC_EVENT_ID_TPC0_ECC,
0228 GOYA_ASYNC_EVENT_ID_TPC1_ECC,
0229 GOYA_ASYNC_EVENT_ID_TPC2_ECC,
0230 GOYA_ASYNC_EVENT_ID_TPC3_ECC,
0231 GOYA_ASYNC_EVENT_ID_TPC4_ECC,
0232 GOYA_ASYNC_EVENT_ID_TPC5_ECC,
0233 GOYA_ASYNC_EVENT_ID_TPC6_ECC,
0234 GOYA_ASYNC_EVENT_ID_TPC7_ECC,
0235 GOYA_ASYNC_EVENT_ID_MME_ECC,
0236 GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
0237 GOYA_ASYNC_EVENT_ID_MMU_ECC,
0238 GOYA_ASYNC_EVENT_ID_DMA_MACRO,
0239 GOYA_ASYNC_EVENT_ID_DMA_ECC,
0240 GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
0241 GOYA_ASYNC_EVENT_ID_PSOC_MEM,
0242 GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
0243 GOYA_ASYNC_EVENT_ID_SRAM0,
0244 GOYA_ASYNC_EVENT_ID_SRAM1,
0245 GOYA_ASYNC_EVENT_ID_SRAM2,
0246 GOYA_ASYNC_EVENT_ID_SRAM3,
0247 GOYA_ASYNC_EVENT_ID_SRAM4,
0248 GOYA_ASYNC_EVENT_ID_SRAM5,
0249 GOYA_ASYNC_EVENT_ID_SRAM6,
0250 GOYA_ASYNC_EVENT_ID_SRAM7,
0251 GOYA_ASYNC_EVENT_ID_SRAM8,
0252 GOYA_ASYNC_EVENT_ID_SRAM9,
0253 GOYA_ASYNC_EVENT_ID_SRAM10,
0254 GOYA_ASYNC_EVENT_ID_SRAM11,
0255 GOYA_ASYNC_EVENT_ID_SRAM12,
0256 GOYA_ASYNC_EVENT_ID_SRAM13,
0257 GOYA_ASYNC_EVENT_ID_SRAM14,
0258 GOYA_ASYNC_EVENT_ID_SRAM15,
0259 GOYA_ASYNC_EVENT_ID_SRAM16,
0260 GOYA_ASYNC_EVENT_ID_SRAM17,
0261 GOYA_ASYNC_EVENT_ID_SRAM18,
0262 GOYA_ASYNC_EVENT_ID_SRAM19,
0263 GOYA_ASYNC_EVENT_ID_SRAM20,
0264 GOYA_ASYNC_EVENT_ID_SRAM21,
0265 GOYA_ASYNC_EVENT_ID_SRAM22,
0266 GOYA_ASYNC_EVENT_ID_SRAM23,
0267 GOYA_ASYNC_EVENT_ID_SRAM24,
0268 GOYA_ASYNC_EVENT_ID_SRAM25,
0269 GOYA_ASYNC_EVENT_ID_SRAM26,
0270 GOYA_ASYNC_EVENT_ID_SRAM27,
0271 GOYA_ASYNC_EVENT_ID_SRAM28,
0272 GOYA_ASYNC_EVENT_ID_SRAM29,
0273 GOYA_ASYNC_EVENT_ID_GIC500,
0274 GOYA_ASYNC_EVENT_ID_PLL0,
0275 GOYA_ASYNC_EVENT_ID_PLL1,
0276 GOYA_ASYNC_EVENT_ID_PLL3,
0277 GOYA_ASYNC_EVENT_ID_PLL4,
0278 GOYA_ASYNC_EVENT_ID_PLL5,
0279 GOYA_ASYNC_EVENT_ID_PLL6,
0280 GOYA_ASYNC_EVENT_ID_AXI_ECC,
0281 GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
0282 GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
0283 GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
0284 GOYA_ASYNC_EVENT_ID_PCIE_DEC,
0285 GOYA_ASYNC_EVENT_ID_TPC0_DEC,
0286 GOYA_ASYNC_EVENT_ID_TPC1_DEC,
0287 GOYA_ASYNC_EVENT_ID_TPC2_DEC,
0288 GOYA_ASYNC_EVENT_ID_TPC3_DEC,
0289 GOYA_ASYNC_EVENT_ID_TPC4_DEC,
0290 GOYA_ASYNC_EVENT_ID_TPC5_DEC,
0291 GOYA_ASYNC_EVENT_ID_TPC6_DEC,
0292 GOYA_ASYNC_EVENT_ID_TPC7_DEC,
0293 GOYA_ASYNC_EVENT_ID_MME_WACS,
0294 GOYA_ASYNC_EVENT_ID_MME_WACSD,
0295 GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
0296 GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
0297 GOYA_ASYNC_EVENT_ID_PSOC,
0298 GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
0299 GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
0300 GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
0301 GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
0302 GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
0303 GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
0304 GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
0305 GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
0306 GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
0307 GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
0308 GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
0309 GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
0310 GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
0311 GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
0312 GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
0313 GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
0314 GOYA_ASYNC_EVENT_ID_TPC0_QM,
0315 GOYA_ASYNC_EVENT_ID_TPC1_QM,
0316 GOYA_ASYNC_EVENT_ID_TPC2_QM,
0317 GOYA_ASYNC_EVENT_ID_TPC3_QM,
0318 GOYA_ASYNC_EVENT_ID_TPC4_QM,
0319 GOYA_ASYNC_EVENT_ID_TPC5_QM,
0320 GOYA_ASYNC_EVENT_ID_TPC6_QM,
0321 GOYA_ASYNC_EVENT_ID_TPC7_QM,
0322 GOYA_ASYNC_EVENT_ID_MME_QM,
0323 GOYA_ASYNC_EVENT_ID_MME_CMDQ,
0324 GOYA_ASYNC_EVENT_ID_DMA0_QM,
0325 GOYA_ASYNC_EVENT_ID_DMA1_QM,
0326 GOYA_ASYNC_EVENT_ID_DMA2_QM,
0327 GOYA_ASYNC_EVENT_ID_DMA3_QM,
0328 GOYA_ASYNC_EVENT_ID_DMA4_QM,
0329 GOYA_ASYNC_EVENT_ID_DMA0_CH,
0330 GOYA_ASYNC_EVENT_ID_DMA1_CH,
0331 GOYA_ASYNC_EVENT_ID_DMA2_CH,
0332 GOYA_ASYNC_EVENT_ID_DMA3_CH,
0333 GOYA_ASYNC_EVENT_ID_DMA4_CH,
0334 GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
0335 GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
0336 GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
0337 GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
0338 GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
0339 GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
0340 GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
0341 GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
0342 GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
0343 GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
0344 GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
0345 GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
0346 GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
0347 GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
0348 GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
0349 GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
0350 GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
0351 };
0352
0353 static s64 goya_state_dump_specs_props[SP_MAX] = {0};
0354
0355 static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
0356 static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
0357 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
0358 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
0359
0360 int goya_set_fixed_properties(struct hl_device *hdev)
0361 {
0362 struct asic_fixed_properties *prop = &hdev->asic_prop;
0363 int i;
0364
0365 prop->max_queues = GOYA_QUEUE_ID_SIZE;
0366 prop->hw_queues_props = kcalloc(prop->max_queues,
0367 sizeof(struct hw_queue_properties),
0368 GFP_KERNEL);
0369
0370 if (!prop->hw_queues_props)
0371 return -ENOMEM;
0372
0373 for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
0374 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
0375 prop->hw_queues_props[i].driver_only = 0;
0376 prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
0377 }
0378
0379 for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
0380 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
0381 prop->hw_queues_props[i].driver_only = 1;
0382 prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
0383 }
0384
0385 for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
0386 NUMBER_OF_INT_HW_QUEUES; i++) {
0387 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
0388 prop->hw_queues_props[i].driver_only = 0;
0389 prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
0390 }
0391
0392 prop->cfg_base_address = CFG_BASE;
0393 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
0394 prop->host_base_address = HOST_PHYS_BASE;
0395 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
0396 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
0397 prop->completion_mode = HL_COMPLETION_MODE_JOB;
0398 prop->dram_base_address = DRAM_PHYS_BASE;
0399 prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
0400 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
0401 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
0402
0403 prop->sram_base_address = SRAM_BASE_ADDR;
0404 prop->sram_size = SRAM_SIZE;
0405 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
0406 prop->sram_user_base_address = prop->sram_base_address +
0407 SRAM_USER_BASE_OFFSET;
0408
0409 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
0410 prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
0411 if (hdev->pldm)
0412 prop->mmu_pgt_size = 0x800000;
0413 else
0414 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
0415 prop->mmu_pte_size = HL_PTE_SIZE;
0416 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
0417 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
0418 prop->dram_page_size = PAGE_SIZE_2MB;
0419 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
0420 prop->dram_supports_virtual_memory = true;
0421
0422 prop->dmmu.hop_shifts[MMU_HOP0] = MMU_V1_0_HOP0_SHIFT;
0423 prop->dmmu.hop_shifts[MMU_HOP1] = MMU_V1_0_HOP1_SHIFT;
0424 prop->dmmu.hop_shifts[MMU_HOP2] = MMU_V1_0_HOP2_SHIFT;
0425 prop->dmmu.hop_shifts[MMU_HOP3] = MMU_V1_0_HOP3_SHIFT;
0426 prop->dmmu.hop_shifts[MMU_HOP4] = MMU_V1_0_HOP4_SHIFT;
0427 prop->dmmu.hop_masks[MMU_HOP0] = MMU_V1_0_HOP0_MASK;
0428 prop->dmmu.hop_masks[MMU_HOP1] = MMU_V1_0_HOP1_MASK;
0429 prop->dmmu.hop_masks[MMU_HOP2] = MMU_V1_0_HOP2_MASK;
0430 prop->dmmu.hop_masks[MMU_HOP3] = MMU_V1_0_HOP3_MASK;
0431 prop->dmmu.hop_masks[MMU_HOP4] = MMU_V1_0_HOP4_MASK;
0432 prop->dmmu.start_addr = VA_DDR_SPACE_START;
0433 prop->dmmu.end_addr = VA_DDR_SPACE_END;
0434 prop->dmmu.page_size = PAGE_SIZE_2MB;
0435 prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
0436 prop->dmmu.last_mask = LAST_MASK;
0437
0438 prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
0439 prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
0440
0441
0442 memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
0443 prop->pmmu.start_addr = VA_HOST_SPACE_START;
0444 prop->pmmu.end_addr = VA_HOST_SPACE_END;
0445 prop->pmmu.page_size = PAGE_SIZE_4KB;
0446 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
0447 prop->pmmu.last_mask = LAST_MASK;
0448
0449 prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
0450 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
0451
0452
0453 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
0454 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
0455
0456 prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
0457 prop->cfg_size = CFG_SIZE;
0458 prop->max_asid = MAX_ASID;
0459 prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
0460 prop->high_pll = PLL_HIGH_DEFAULT;
0461 prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
0462 prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
0463 prop->max_power_default = MAX_POWER_DEFAULT;
0464 prop->dc_power_default = DC_POWER_DEFAULT;
0465 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
0466 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
0467 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
0468
0469 strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
0470 CARD_NAME_MAX_LEN);
0471
0472 prop->max_pending_cs = GOYA_MAX_PENDING_CS;
0473
0474 prop->first_available_user_interrupt = USHRT_MAX;
0475
0476 for (i = 0 ; i < HL_MAX_DCORES ; i++)
0477 prop->first_available_cq[i] = USHRT_MAX;
0478
0479 prop->fw_cpu_boot_dev_sts0_valid = false;
0480 prop->fw_cpu_boot_dev_sts1_valid = false;
0481 prop->hard_reset_done_by_fw = false;
0482 prop->gic_interrupts_enable = true;
0483
0484 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
0485
0486 prop->clk_pll_index = HL_GOYA_MME_PLL;
0487
0488 prop->use_get_power_for_reset_history = true;
0489
0490 prop->configurable_stop_on_err = true;
0491
0492 prop->set_max_power_on_device_init = true;
0493
0494 prop->dma_mask = 48;
0495
0496 return 0;
0497 }
0498
0499
0500
0501
0502
0503
0504
0505
0506
0507
0508 static int goya_pci_bars_map(struct hl_device *hdev)
0509 {
0510 static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
0511 bool is_wc[3] = {false, false, true};
0512 int rc;
0513
0514 rc = hl_pci_bars_map(hdev, name, is_wc);
0515 if (rc)
0516 return rc;
0517
0518 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
0519 (CFG_BASE - SRAM_BASE_ADDR);
0520
0521 return 0;
0522 }
0523
0524 static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
0525 {
0526 struct goya_device *goya = hdev->asic_specific;
0527 struct hl_inbound_pci_region pci_region;
0528 u64 old_addr = addr;
0529 int rc;
0530
0531 if ((goya) && (goya->ddr_bar_cur_addr == addr))
0532 return old_addr;
0533
0534
0535 pci_region.mode = PCI_BAR_MATCH_MODE;
0536 pci_region.bar = DDR_BAR_ID;
0537 pci_region.addr = addr;
0538 rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
0539 if (rc)
0540 return U64_MAX;
0541
0542 if (goya) {
0543 old_addr = goya->ddr_bar_cur_addr;
0544 goya->ddr_bar_cur_addr = addr;
0545 }
0546
0547 return old_addr;
0548 }
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558 static int goya_init_iatu(struct hl_device *hdev)
0559 {
0560 struct hl_inbound_pci_region inbound_region;
0561 struct hl_outbound_pci_region outbound_region;
0562 int rc;
0563
0564 if (hdev->asic_prop.iatu_done_by_fw)
0565 return 0;
0566
0567
0568 inbound_region.mode = PCI_BAR_MATCH_MODE;
0569 inbound_region.bar = SRAM_CFG_BAR_ID;
0570 inbound_region.addr = SRAM_BASE_ADDR;
0571 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
0572 if (rc)
0573 goto done;
0574
0575
0576 inbound_region.mode = PCI_BAR_MATCH_MODE;
0577 inbound_region.bar = DDR_BAR_ID;
0578 inbound_region.addr = DRAM_PHYS_BASE;
0579 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
0580 if (rc)
0581 goto done;
0582
0583
0584 outbound_region.addr = HOST_PHYS_BASE;
0585 outbound_region.size = HOST_PHYS_SIZE;
0586 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
0587
0588 done:
0589 return rc;
0590 }
0591
0592 static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
0593 {
0594 return RREG32(mmHW_STATE);
0595 }
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606
0607
0608 static int goya_early_init(struct hl_device *hdev)
0609 {
0610 struct asic_fixed_properties *prop = &hdev->asic_prop;
0611 struct pci_dev *pdev = hdev->pdev;
0612 resource_size_t pci_bar_size;
0613 u32 fw_boot_status, val;
0614 int rc;
0615
0616 rc = goya_set_fixed_properties(hdev);
0617 if (rc) {
0618 dev_err(hdev->dev, "Failed to get fixed properties\n");
0619 return rc;
0620 }
0621
0622
0623 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
0624
0625 if (pci_bar_size != CFG_BAR_SIZE) {
0626 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
0627 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
0628 rc = -ENODEV;
0629 goto free_queue_props;
0630 }
0631
0632 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
0633
0634 if (pci_bar_size != MSIX_BAR_SIZE) {
0635 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
0636 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
0637 rc = -ENODEV;
0638 goto free_queue_props;
0639 }
0640
0641 prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
0642 hdev->dram_pci_bar_start = pci_resource_start(pdev, DDR_BAR_ID);
0643
0644
0645 if (hdev->asic_prop.fw_security_enabled) {
0646 hdev->asic_prop.iatu_done_by_fw = true;
0647 goto pci_init;
0648 }
0649
0650 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
0651 &fw_boot_status);
0652 if (rc)
0653 goto free_queue_props;
0654
0655
0656 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
0657 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
0658 hdev->asic_prop.iatu_done_by_fw = true;
0659
0660 pci_init:
0661 rc = hl_pci_init(hdev);
0662 if (rc)
0663 goto free_queue_props;
0664
0665
0666
0667
0668 rc = hl_fw_read_preboot_status(hdev);
0669 if (rc) {
0670 if (hdev->reset_on_preboot_fail)
0671 hdev->asic_funcs->hw_fini(hdev, true, false);
0672 goto pci_fini;
0673 }
0674
0675 if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
0676 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
0677 hdev->asic_funcs->hw_fini(hdev, true, false);
0678 }
0679
0680 if (!hdev->pldm) {
0681 val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
0682 if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
0683 dev_warn(hdev->dev,
0684 "PCI strap is not configured correctly, PCI bus errors may occur\n");
0685 }
0686
0687 return 0;
0688
0689 pci_fini:
0690 hl_pci_fini(hdev);
0691 free_queue_props:
0692 kfree(hdev->asic_prop.hw_queues_props);
0693 return rc;
0694 }
0695
0696
0697
0698
0699
0700
0701
0702
0703
0704 static int goya_early_fini(struct hl_device *hdev)
0705 {
0706 kfree(hdev->asic_prop.hw_queues_props);
0707 hl_pci_fini(hdev);
0708
0709 return 0;
0710 }
0711
0712 static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
0713 {
0714
0715 WREG32_AND(reg, ~0x7FF);
0716 WREG32_OR(reg, asid);
0717 }
0718
0719 static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
0720 {
0721 struct goya_device *goya = hdev->asic_specific;
0722
0723 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
0724 return;
0725
0726 if (secure)
0727 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
0728 else
0729 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
0730
0731 RREG32(mmDMA_QM_0_GLBL_PROT);
0732 }
0733
0734
0735
0736
0737
0738
0739
0740 static void goya_fetch_psoc_frequency(struct hl_device *hdev)
0741 {
0742 struct asic_fixed_properties *prop = &hdev->asic_prop;
0743 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
0744 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
0745 int rc;
0746
0747 if (hdev->asic_prop.fw_security_enabled) {
0748 struct goya_device *goya = hdev->asic_specific;
0749
0750 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
0751 return;
0752
0753 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
0754 pll_freq_arr);
0755
0756 if (rc)
0757 return;
0758
0759 freq = pll_freq_arr[1];
0760 } else {
0761 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
0762 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
0763 nr = RREG32(mmPSOC_PCI_PLL_NR);
0764 nf = RREG32(mmPSOC_PCI_PLL_NF);
0765 od = RREG32(mmPSOC_PCI_PLL_OD);
0766
0767 if (div_sel == DIV_SEL_REF_CLK ||
0768 div_sel == DIV_SEL_DIVIDED_REF) {
0769 if (div_sel == DIV_SEL_REF_CLK)
0770 freq = PLL_REF_CLK;
0771 else
0772 freq = PLL_REF_CLK / (div_fctr + 1);
0773 } else if (div_sel == DIV_SEL_PLL_CLK ||
0774 div_sel == DIV_SEL_DIVIDED_PLL) {
0775 pll_clk = PLL_REF_CLK * (nf + 1) /
0776 ((nr + 1) * (od + 1));
0777 if (div_sel == DIV_SEL_PLL_CLK)
0778 freq = pll_clk;
0779 else
0780 freq = pll_clk / (div_fctr + 1);
0781 } else {
0782 dev_warn(hdev->dev,
0783 "Received invalid div select value: %d",
0784 div_sel);
0785 freq = 0;
0786 }
0787 }
0788
0789 prop->psoc_timestamp_frequency = freq;
0790 prop->psoc_pci_pll_nr = nr;
0791 prop->psoc_pci_pll_nf = nf;
0792 prop->psoc_pci_pll_od = od;
0793 prop->psoc_pci_pll_div_factor = div_fctr;
0794 }
0795
0796
0797
0798
0799
0800
0801
0802
0803
0804
0805
0806
0807
0808
0809 int goya_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
0810 {
0811 struct goya_device *goya = hdev->asic_specific;
0812
0813 if ((goya->pm_mng_profile == PM_MANUAL) ||
0814 (goya->curr_pll_profile == freq))
0815 return 0;
0816
0817 dev_dbg(hdev->dev, "Changing device frequency to %s\n",
0818 freq == PLL_HIGH ? "high" : "low");
0819
0820 goya_set_pll_profile(hdev, freq);
0821
0822 goya->curr_pll_profile = freq;
0823
0824 return 1;
0825 }
0826
0827 static void goya_set_freq_to_low_job(struct work_struct *work)
0828 {
0829 struct goya_work_freq *goya_work = container_of(work,
0830 struct goya_work_freq,
0831 work_freq.work);
0832 struct hl_device *hdev = goya_work->hdev;
0833
0834 mutex_lock(&hdev->fpriv_list_lock);
0835
0836 if (!hdev->is_compute_ctx_active)
0837 goya_set_frequency(hdev, PLL_LOW);
0838
0839 mutex_unlock(&hdev->fpriv_list_lock);
0840
0841 schedule_delayed_work(&goya_work->work_freq,
0842 usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
0843 }
0844
0845 int goya_late_init(struct hl_device *hdev)
0846 {
0847 struct asic_fixed_properties *prop = &hdev->asic_prop;
0848 struct goya_device *goya = hdev->asic_specific;
0849 int rc;
0850
0851 goya_fetch_psoc_frequency(hdev);
0852
0853 rc = goya_mmu_clear_pgt_range(hdev);
0854 if (rc) {
0855 dev_err(hdev->dev,
0856 "Failed to clear MMU page tables range %d\n", rc);
0857 return rc;
0858 }
0859
0860 rc = goya_mmu_set_dram_default_page(hdev);
0861 if (rc) {
0862 dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
0863 return rc;
0864 }
0865
0866 rc = goya_mmu_add_mappings_for_device_cpu(hdev);
0867 if (rc)
0868 return rc;
0869
0870 rc = goya_init_cpu_queues(hdev);
0871 if (rc)
0872 return rc;
0873
0874 rc = goya_test_cpu_queue(hdev);
0875 if (rc)
0876 return rc;
0877
0878 rc = goya_cpucp_info_get(hdev);
0879 if (rc) {
0880 dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
0881 return rc;
0882 }
0883
0884
0885
0886
0887
0888 WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
0889
0890 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
0891 if (rc) {
0892 dev_err(hdev->dev,
0893 "Failed to enable PCI access from CPU %d\n", rc);
0894 return rc;
0895 }
0896
0897
0898 goya->curr_pll_profile = PLL_LOW;
0899
0900 goya->pm_mng_profile = PM_AUTO;
0901
0902 goya_set_pll_profile(hdev, PLL_LOW);
0903
0904 schedule_delayed_work(&goya->goya_work->work_freq,
0905 usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
0906
0907 return 0;
0908 }
0909
0910
0911
0912
0913
0914
0915
0916
0917 void goya_late_fini(struct hl_device *hdev)
0918 {
0919 const struct hwmon_channel_info **channel_info_arr;
0920 struct goya_device *goya = hdev->asic_specific;
0921 int i = 0;
0922
0923 cancel_delayed_work_sync(&goya->goya_work->work_freq);
0924
0925 if (!hdev->hl_chip_info->info)
0926 return;
0927
0928 channel_info_arr = hdev->hl_chip_info->info;
0929
0930 while (channel_info_arr[i]) {
0931 kfree(channel_info_arr[i]->config);
0932 kfree(channel_info_arr[i]);
0933 i++;
0934 }
0935
0936 kfree(channel_info_arr);
0937
0938 hdev->hl_chip_info->info = NULL;
0939 }
0940
0941 static void goya_set_pci_memory_regions(struct hl_device *hdev)
0942 {
0943 struct asic_fixed_properties *prop = &hdev->asic_prop;
0944 struct pci_mem_region *region;
0945
0946
0947 region = &hdev->pci_mem_region[PCI_REGION_CFG];
0948 region->region_base = CFG_BASE;
0949 region->region_size = CFG_SIZE;
0950 region->offset_in_bar = CFG_BASE - SRAM_BASE_ADDR;
0951 region->bar_size = CFG_BAR_SIZE;
0952 region->bar_id = SRAM_CFG_BAR_ID;
0953 region->used = 1;
0954
0955
0956 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
0957 region->region_base = SRAM_BASE_ADDR;
0958 region->region_size = SRAM_SIZE;
0959 region->offset_in_bar = 0;
0960 region->bar_size = CFG_BAR_SIZE;
0961 region->bar_id = SRAM_CFG_BAR_ID;
0962 region->used = 1;
0963
0964
0965 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
0966 region->region_base = DRAM_PHYS_BASE;
0967 region->region_size = hdev->asic_prop.dram_size;
0968 region->offset_in_bar = 0;
0969 region->bar_size = prop->dram_pci_bar_size;
0970 region->bar_id = DDR_BAR_ID;
0971 region->used = 1;
0972 }
0973
0974
0975
0976
0977
0978
0979
0980 static int goya_sw_init(struct hl_device *hdev)
0981 {
0982 struct goya_device *goya;
0983 int rc;
0984
0985
0986 goya = kzalloc(sizeof(*goya), GFP_KERNEL);
0987 if (!goya)
0988 return -ENOMEM;
0989
0990
0991 goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
0992
0993 goya->mme_clk = GOYA_PLL_FREQ_LOW;
0994 goya->tpc_clk = GOYA_PLL_FREQ_LOW;
0995 goya->ic_clk = GOYA_PLL_FREQ_LOW;
0996
0997 hdev->asic_specific = goya;
0998
0999
1000 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1001 &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
1002 if (!hdev->dma_pool) {
1003 dev_err(hdev->dev, "failed to create DMA pool\n");
1004 rc = -ENOMEM;
1005 goto free_goya_device;
1006 }
1007
1008 hdev->cpu_accessible_dma_mem = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1009 &hdev->cpu_accessible_dma_address,
1010 GFP_KERNEL | __GFP_ZERO);
1011
1012 if (!hdev->cpu_accessible_dma_mem) {
1013 rc = -ENOMEM;
1014 goto free_dma_pool;
1015 }
1016
1017 dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
1018 &hdev->cpu_accessible_dma_address);
1019
1020 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1021 if (!hdev->cpu_accessible_dma_pool) {
1022 dev_err(hdev->dev,
1023 "Failed to create CPU accessible DMA pool\n");
1024 rc = -ENOMEM;
1025 goto free_cpu_dma_mem;
1026 }
1027
1028 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1029 (uintptr_t) hdev->cpu_accessible_dma_mem,
1030 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1031 if (rc) {
1032 dev_err(hdev->dev,
1033 "Failed to add memory to CPU accessible DMA pool\n");
1034 rc = -EFAULT;
1035 goto free_cpu_accessible_dma_pool;
1036 }
1037
1038 spin_lock_init(&goya->hw_queues_lock);
1039 hdev->supports_coresight = true;
1040 hdev->asic_prop.supports_compute_reset = true;
1041 hdev->asic_prop.allow_inference_soft_reset = true;
1042 hdev->supports_wait_for_multi_cs = false;
1043
1044 hdev->asic_funcs->set_pci_memory_regions(hdev);
1045
1046 goya->goya_work = kmalloc(sizeof(struct goya_work_freq), GFP_KERNEL);
1047 if (!goya->goya_work) {
1048 rc = -ENOMEM;
1049 goto free_cpu_accessible_dma_pool;
1050 }
1051
1052 goya->goya_work->hdev = hdev;
1053 INIT_DELAYED_WORK(&goya->goya_work->work_freq, goya_set_freq_to_low_job);
1054
1055 return 0;
1056
1057 free_cpu_accessible_dma_pool:
1058 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1059 free_cpu_dma_mem:
1060 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1061 hdev->cpu_accessible_dma_address);
1062 free_dma_pool:
1063 dma_pool_destroy(hdev->dma_pool);
1064 free_goya_device:
1065 kfree(goya);
1066
1067 return rc;
1068 }
1069
1070
1071
1072
1073
1074
1075
1076 static int goya_sw_fini(struct hl_device *hdev)
1077 {
1078 struct goya_device *goya = hdev->asic_specific;
1079
1080 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1081
1082 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1083 hdev->cpu_accessible_dma_address);
1084
1085 dma_pool_destroy(hdev->dma_pool);
1086
1087 kfree(goya->goya_work);
1088 kfree(goya);
1089
1090 return 0;
1091 }
1092
1093 static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
1094 dma_addr_t bus_address)
1095 {
1096 struct goya_device *goya = hdev->asic_specific;
1097 u32 mtr_base_lo, mtr_base_hi;
1098 u32 so_base_lo, so_base_hi;
1099 u32 gic_base_lo, gic_base_hi;
1100 u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
1101 u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
1102
1103 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1104 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1105 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1106 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1107
1108 gic_base_lo =
1109 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1110 gic_base_hi =
1111 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1112
1113 WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
1114 WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
1115
1116 WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
1117 WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
1118 WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
1119
1120 WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1121 WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1122 WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1123 WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1124 WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1125 WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1126 WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
1127 GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
1128
1129
1130 WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
1131 WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
1132
1133 if (goya->hw_cap_initialized & HW_CAP_MMU)
1134 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
1135 else
1136 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
1137
1138 if (hdev->stop_on_err)
1139 dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
1140
1141 WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
1142 WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
1143 }
1144
1145 static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
1146 {
1147 u32 gic_base_lo, gic_base_hi;
1148 u64 sob_addr;
1149 u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
1150
1151 gic_base_lo =
1152 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1153 gic_base_hi =
1154 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1155
1156 WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
1157 WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
1158 WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
1159 GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
1160
1161 if (dma_id)
1162 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
1163 (dma_id - 1) * 4;
1164 else
1165 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
1166
1167 WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
1168 WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
1169 }
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179 void goya_init_dma_qmans(struct hl_device *hdev)
1180 {
1181 struct goya_device *goya = hdev->asic_specific;
1182 struct hl_hw_queue *q;
1183 int i;
1184
1185 if (goya->hw_cap_initialized & HW_CAP_DMA)
1186 return;
1187
1188 q = &hdev->kernel_queues[0];
1189
1190 for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
1191 q->cq_id = q->msi_vec = i;
1192 goya_init_dma_qman(hdev, i, q->bus_address);
1193 goya_init_dma_ch(hdev, i);
1194 }
1195
1196 goya->hw_cap_initialized |= HW_CAP_DMA;
1197 }
1198
1199
1200
1201
1202
1203
1204
1205 static void goya_disable_external_queues(struct hl_device *hdev)
1206 {
1207 struct goya_device *goya = hdev->asic_specific;
1208
1209 if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1210 return;
1211
1212 WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1213 WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1214 WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1215 WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1216 WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1217 }
1218
1219 static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1220 u32 cp_sts_reg, u32 glbl_sts0_reg)
1221 {
1222 int rc;
1223 u32 status;
1224
1225
1226
1227 WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1228
1229 status = RREG32(cp_sts_reg);
1230 if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1231 rc = hl_poll_timeout(
1232 hdev,
1233 cp_sts_reg,
1234 status,
1235 !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1236 1000,
1237 QMAN_FENCE_TIMEOUT_USEC);
1238
1239
1240 if (rc)
1241 return 0;
1242 }
1243
1244 rc = hl_poll_timeout(
1245 hdev,
1246 glbl_sts0_reg,
1247 status,
1248 (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1249 1000,
1250 QMAN_STOP_TIMEOUT_USEC);
1251
1252 if (rc) {
1253 dev_err(hdev->dev,
1254 "Timeout while waiting for QMAN to stop\n");
1255 return -EINVAL;
1256 }
1257
1258 return 0;
1259 }
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269 static int goya_stop_external_queues(struct hl_device *hdev)
1270 {
1271 int rc, retval = 0;
1272
1273 struct goya_device *goya = hdev->asic_specific;
1274
1275 if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1276 return retval;
1277
1278 rc = goya_stop_queue(hdev,
1279 mmDMA_QM_0_GLBL_CFG1,
1280 mmDMA_QM_0_CP_STS,
1281 mmDMA_QM_0_GLBL_STS0);
1282
1283 if (rc) {
1284 dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1285 retval = -EIO;
1286 }
1287
1288 rc = goya_stop_queue(hdev,
1289 mmDMA_QM_1_GLBL_CFG1,
1290 mmDMA_QM_1_CP_STS,
1291 mmDMA_QM_1_GLBL_STS0);
1292
1293 if (rc) {
1294 dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1295 retval = -EIO;
1296 }
1297
1298 rc = goya_stop_queue(hdev,
1299 mmDMA_QM_2_GLBL_CFG1,
1300 mmDMA_QM_2_CP_STS,
1301 mmDMA_QM_2_GLBL_STS0);
1302
1303 if (rc) {
1304 dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1305 retval = -EIO;
1306 }
1307
1308 rc = goya_stop_queue(hdev,
1309 mmDMA_QM_3_GLBL_CFG1,
1310 mmDMA_QM_3_CP_STS,
1311 mmDMA_QM_3_GLBL_STS0);
1312
1313 if (rc) {
1314 dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1315 retval = -EIO;
1316 }
1317
1318 rc = goya_stop_queue(hdev,
1319 mmDMA_QM_4_GLBL_CFG1,
1320 mmDMA_QM_4_CP_STS,
1321 mmDMA_QM_4_GLBL_STS0);
1322
1323 if (rc) {
1324 dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1325 retval = -EIO;
1326 }
1327
1328 return retval;
1329 }
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339 int goya_init_cpu_queues(struct hl_device *hdev)
1340 {
1341 struct goya_device *goya = hdev->asic_specific;
1342 struct asic_fixed_properties *prop = &hdev->asic_prop;
1343 struct hl_eq *eq;
1344 u32 status;
1345 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1346 int err;
1347
1348 if (!hdev->cpu_queues_enable)
1349 return 0;
1350
1351 if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1352 return 0;
1353
1354 eq = &hdev->event_queue;
1355
1356 WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1357 WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1358
1359 WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1360 WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1361
1362 WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1363 lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1364 WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1365 upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1366
1367 WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1368 WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1369 WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1370
1371
1372 WREG32(mmCPU_EQ_CI, 0);
1373
1374 WREG32(mmCPU_IF_PF_PQ_PI, 0);
1375
1376 WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1377
1378 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1379 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1380
1381 err = hl_poll_timeout(
1382 hdev,
1383 mmCPU_PQ_INIT_STATUS,
1384 status,
1385 (status == PQ_INIT_STATUS_READY_FOR_HOST),
1386 1000,
1387 GOYA_CPU_TIMEOUT_USEC);
1388
1389 if (err) {
1390 dev_err(hdev->dev,
1391 "Failed to setup communication with device CPU\n");
1392 return -EIO;
1393 }
1394
1395
1396 if (prop->fw_cpu_boot_dev_sts0_valid)
1397 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
1398
1399 if (prop->fw_cpu_boot_dev_sts1_valid)
1400 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
1401
1402 goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1403 return 0;
1404 }
1405
1406 static void goya_set_pll_refclk(struct hl_device *hdev)
1407 {
1408 WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1409 WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1410 WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1411 WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1412
1413 WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1414 WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1415 WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1416 WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1417
1418 WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1419 WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1420 WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1421 WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1422
1423 WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1424 WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1425 WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1426 WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1427
1428 WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1429 WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1430 WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1431 WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1432
1433 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1434 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1435 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1436 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1437
1438 WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1439 WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1440 WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1441 WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1442 }
1443
1444 static void goya_disable_clk_rlx(struct hl_device *hdev)
1445 {
1446 WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1447 WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1448 }
1449
1450 static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1451 {
1452 u64 tpc_eml_address;
1453 u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1454 int err, slm_index;
1455
1456 tpc_offset = tpc_id * 0x40000;
1457 tpc_eml_offset = tpc_id * 0x200000;
1458 tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1459 tpc_slm_offset = tpc_eml_address + 0x100000;
1460
1461
1462
1463
1464
1465
1466 val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1467 if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1468 dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1469 tpc_id);
1470
1471 WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1472
1473 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1474 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1475 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1476 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1477 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1478 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1479 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1480 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1481 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1482 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1483
1484 WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1485 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1486
1487 err = hl_poll_timeout(
1488 hdev,
1489 mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1490 val,
1491 (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1492 1000,
1493 HL_DEVICE_TIMEOUT_USEC);
1494
1495 if (err)
1496 dev_err(hdev->dev,
1497 "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1498
1499 WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1500 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1501
1502 msleep(GOYA_RESET_WAIT_MSEC);
1503
1504 WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1505 ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1506
1507 msleep(GOYA_RESET_WAIT_MSEC);
1508
1509 for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1510 WREG32(tpc_slm_offset + (slm_index << 2), 0);
1511
1512 val = RREG32(tpc_slm_offset);
1513 }
1514
1515 static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1516 {
1517 struct goya_device *goya = hdev->asic_specific;
1518 int i;
1519
1520 if (hdev->pldm)
1521 return;
1522
1523 if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1524 return;
1525
1526
1527
1528 for (i = 0 ; i < TPC_MAX_NUM ; i++)
1529 _goya_tpc_mbist_workaround(hdev, i);
1530
1531 goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1532 }
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542 static void goya_init_golden_registers(struct hl_device *hdev)
1543 {
1544 struct goya_device *goya = hdev->asic_specific;
1545 u32 polynom[10], tpc_intr_mask, offset;
1546 int i;
1547
1548 if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1549 return;
1550
1551 polynom[0] = 0x00020080;
1552 polynom[1] = 0x00401000;
1553 polynom[2] = 0x00200800;
1554 polynom[3] = 0x00002000;
1555 polynom[4] = 0x00080200;
1556 polynom[5] = 0x00040100;
1557 polynom[6] = 0x00100400;
1558 polynom[7] = 0x00004000;
1559 polynom[8] = 0x00010000;
1560 polynom[9] = 0x00008000;
1561
1562
1563 tpc_intr_mask = 0x7FFF;
1564
1565 for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1566 WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1567 WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1568 WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1569 WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1570 WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1571
1572 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1573 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1574 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1575 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1576 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1577
1578
1579 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1580 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1581 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1582 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1583 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1584
1585 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1586 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1587 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1588 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1589 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1590
1591 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1592 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1593 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1594 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1595 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1596
1597 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1598 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1599 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1600 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1601 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1602 }
1603
1604 WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1605 WREG32(mmMME_AGU, 0x0f0f0f10);
1606 WREG32(mmMME_SEI_MASK, ~0x0);
1607
1608 WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1609 WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1610 WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1611 WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1612 WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1613 WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1614 WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1615 WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1616 WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1617 WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1618 WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1619 WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1620 WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1621 WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1622 WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1623 WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1624 WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1625 WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1626 WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1627 WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1628 WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1629 WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1630 WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1631 WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1632 WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1633 WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1634 WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1635 WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1636 WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1637 WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1638 WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1639 WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1640 WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1641 WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1642 WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1643 WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1644 WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1645 WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1646 WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1647 WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1648 WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1649 WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1650 WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1651 WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1652 WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1653 WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1654 WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1655 WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1656 WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1657 WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1658 WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1659 WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1660 WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1661 WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1662 WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1663 WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1664 WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1665 WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1666 WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1667 WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1668 WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1669 WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1670 WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1671 WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1672 WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1673 WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1674 WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1675 WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1676 WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1677 WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1678 WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1679 WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1680 WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1681 WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1682 WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1683 WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1684 WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1685 WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1686 WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1687 WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1688 WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1689 WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1690 WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1691 WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1692
1693 WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1694 WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1695 WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1696 WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1697 WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1698 WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1699 WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1700 WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1701 WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1702 WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1703 WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1704 WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1705
1706 WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1707 WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1708 WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1709 WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1710 WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1711 WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1712 WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1713 WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1714 WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1715 WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1716 WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1717 WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1718
1719 WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1720 WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1721 WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1722 WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1723 WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1724 WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1725 WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1726 WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1727 WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1728 WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1729 WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1730 WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1731
1732 WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1733 WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1734 WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1735 WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1736 WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1737 WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1738 WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1739 WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1740 WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1741 WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1742 WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1743 WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1744
1745 WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1746 WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1747 WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1748 WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1749 WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1750 WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1751 WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1752 WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1753 WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1754 WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1755 WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1756 WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1757
1758 WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1759 WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1760 WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1761 WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1762 WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1763 WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1764 WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1765 WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1766 WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1767 WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1768 WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1769 WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1770
1771 for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1772 WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1773 WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1774 WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1775 WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1776 WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1777 WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1778
1779 WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1780 WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1781 WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1782 WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1783 WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1784 WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1785 WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1786 WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1787
1788 WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1789 WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1790 }
1791
1792 for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1793 WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1794 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1795 WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1796 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1797 }
1798
1799 for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1800
1801
1802
1803
1804 WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1805
1806 WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1807 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1808 WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1809 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1810
1811 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1812 ICACHE_FETCH_LINE_NUM, 2);
1813 }
1814
1815 WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1816 WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1817 1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1818
1819 WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1820 WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1821 1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1822
1823
1824
1825
1826
1827
1828
1829
1830 WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1831
1832 WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1833
1834 goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1835 }
1836
1837 static void goya_init_mme_qman(struct hl_device *hdev)
1838 {
1839 u32 mtr_base_lo, mtr_base_hi;
1840 u32 so_base_lo, so_base_hi;
1841 u32 gic_base_lo, gic_base_hi;
1842 u64 qman_base_addr;
1843
1844 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1845 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1846 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1847 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1848
1849 gic_base_lo =
1850 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1851 gic_base_hi =
1852 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1853
1854 qman_base_addr = hdev->asic_prop.sram_base_address +
1855 MME_QMAN_BASE_OFFSET;
1856
1857 WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1858 WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1859 WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1860 WREG32(mmMME_QM_PQ_PI, 0);
1861 WREG32(mmMME_QM_PQ_CI, 0);
1862 WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1863 WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1864 WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1865 WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1866
1867 WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1868 WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1869 WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1870 WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1871
1872
1873 WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1874
1875 WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1876 WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1877
1878 WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1879
1880 WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1881
1882 WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1883
1884 WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1885 }
1886
1887 static void goya_init_mme_cmdq(struct hl_device *hdev)
1888 {
1889 u32 mtr_base_lo, mtr_base_hi;
1890 u32 so_base_lo, so_base_hi;
1891 u32 gic_base_lo, gic_base_hi;
1892
1893 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1894 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1895 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1896 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1897
1898 gic_base_lo =
1899 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1900 gic_base_hi =
1901 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1902
1903 WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1904 WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1905 WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1906 WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1907
1908
1909 WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1910
1911 WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1912 WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1913
1914 WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1915
1916 WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1917
1918 WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1919
1920 WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1921 }
1922
1923 void goya_init_mme_qmans(struct hl_device *hdev)
1924 {
1925 struct goya_device *goya = hdev->asic_specific;
1926 u32 so_base_lo, so_base_hi;
1927
1928 if (goya->hw_cap_initialized & HW_CAP_MME)
1929 return;
1930
1931 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1932 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1933
1934 WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1935 WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1936
1937 goya_init_mme_qman(hdev);
1938 goya_init_mme_cmdq(hdev);
1939
1940 goya->hw_cap_initialized |= HW_CAP_MME;
1941 }
1942
1943 static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1944 {
1945 u32 mtr_base_lo, mtr_base_hi;
1946 u32 so_base_lo, so_base_hi;
1947 u32 gic_base_lo, gic_base_hi;
1948 u64 qman_base_addr;
1949 u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1950
1951 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1952 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1953 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1954 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1955
1956 gic_base_lo =
1957 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1958 gic_base_hi =
1959 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1960
1961 qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1962
1963 WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1964 WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1965 WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1966 WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1967 WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1968 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1969 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1970 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1971 WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1972
1973 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1974 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1975 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1976 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1977
1978 WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1979
1980 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1981 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1982
1983 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1984 GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1985
1986 WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1987
1988 WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1989
1990 WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1991 }
1992
1993 static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1994 {
1995 u32 mtr_base_lo, mtr_base_hi;
1996 u32 so_base_lo, so_base_hi;
1997 u32 gic_base_lo, gic_base_hi;
1998 u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1999
2000 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
2001 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
2002 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2003 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2004
2005 gic_base_lo =
2006 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
2007 gic_base_hi =
2008 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
2009
2010 WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
2011 WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
2012 WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
2013 WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
2014
2015 WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
2016
2017 WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
2018 WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
2019
2020 WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
2021 GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
2022
2023 WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
2024
2025 WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
2026
2027 WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
2028 }
2029
2030 void goya_init_tpc_qmans(struct hl_device *hdev)
2031 {
2032 struct goya_device *goya = hdev->asic_specific;
2033 u32 so_base_lo, so_base_hi;
2034 u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
2035 mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
2036 int i;
2037
2038 if (goya->hw_cap_initialized & HW_CAP_TPC)
2039 return;
2040
2041 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2042 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2043
2044 for (i = 0 ; i < TPC_MAX_NUM ; i++) {
2045 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
2046 so_base_lo);
2047 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
2048 so_base_hi);
2049 }
2050
2051 goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
2052 goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
2053 goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
2054 goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
2055 goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
2056 goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
2057 goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
2058 goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
2059
2060 for (i = 0 ; i < TPC_MAX_NUM ; i++)
2061 goya_init_tpc_cmdq(hdev, i);
2062
2063 goya->hw_cap_initialized |= HW_CAP_TPC;
2064 }
2065
2066
2067
2068
2069
2070
2071
2072 static void goya_disable_internal_queues(struct hl_device *hdev)
2073 {
2074 struct goya_device *goya = hdev->asic_specific;
2075
2076 if (!(goya->hw_cap_initialized & HW_CAP_MME))
2077 goto disable_tpc;
2078
2079 WREG32(mmMME_QM_GLBL_CFG0, 0);
2080 WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
2081
2082 disable_tpc:
2083 if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2084 return;
2085
2086 WREG32(mmTPC0_QM_GLBL_CFG0, 0);
2087 WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
2088
2089 WREG32(mmTPC1_QM_GLBL_CFG0, 0);
2090 WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
2091
2092 WREG32(mmTPC2_QM_GLBL_CFG0, 0);
2093 WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
2094
2095 WREG32(mmTPC3_QM_GLBL_CFG0, 0);
2096 WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
2097
2098 WREG32(mmTPC4_QM_GLBL_CFG0, 0);
2099 WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
2100
2101 WREG32(mmTPC5_QM_GLBL_CFG0, 0);
2102 WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
2103
2104 WREG32(mmTPC6_QM_GLBL_CFG0, 0);
2105 WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
2106
2107 WREG32(mmTPC7_QM_GLBL_CFG0, 0);
2108 WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
2109 }
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119 static int goya_stop_internal_queues(struct hl_device *hdev)
2120 {
2121 struct goya_device *goya = hdev->asic_specific;
2122 int rc, retval = 0;
2123
2124 if (!(goya->hw_cap_initialized & HW_CAP_MME))
2125 goto stop_tpc;
2126
2127
2128
2129
2130
2131
2132
2133 rc = goya_stop_queue(hdev,
2134 mmMME_QM_GLBL_CFG1,
2135 mmMME_QM_CP_STS,
2136 mmMME_QM_GLBL_STS0);
2137
2138 if (rc) {
2139 dev_err(hdev->dev, "failed to stop MME QMAN\n");
2140 retval = -EIO;
2141 }
2142
2143 rc = goya_stop_queue(hdev,
2144 mmMME_CMDQ_GLBL_CFG1,
2145 mmMME_CMDQ_CP_STS,
2146 mmMME_CMDQ_GLBL_STS0);
2147
2148 if (rc) {
2149 dev_err(hdev->dev, "failed to stop MME CMDQ\n");
2150 retval = -EIO;
2151 }
2152
2153 stop_tpc:
2154 if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2155 return retval;
2156
2157 rc = goya_stop_queue(hdev,
2158 mmTPC0_QM_GLBL_CFG1,
2159 mmTPC0_QM_CP_STS,
2160 mmTPC0_QM_GLBL_STS0);
2161
2162 if (rc) {
2163 dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
2164 retval = -EIO;
2165 }
2166
2167 rc = goya_stop_queue(hdev,
2168 mmTPC0_CMDQ_GLBL_CFG1,
2169 mmTPC0_CMDQ_CP_STS,
2170 mmTPC0_CMDQ_GLBL_STS0);
2171
2172 if (rc) {
2173 dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
2174 retval = -EIO;
2175 }
2176
2177 rc = goya_stop_queue(hdev,
2178 mmTPC1_QM_GLBL_CFG1,
2179 mmTPC1_QM_CP_STS,
2180 mmTPC1_QM_GLBL_STS0);
2181
2182 if (rc) {
2183 dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
2184 retval = -EIO;
2185 }
2186
2187 rc = goya_stop_queue(hdev,
2188 mmTPC1_CMDQ_GLBL_CFG1,
2189 mmTPC1_CMDQ_CP_STS,
2190 mmTPC1_CMDQ_GLBL_STS0);
2191
2192 if (rc) {
2193 dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
2194 retval = -EIO;
2195 }
2196
2197 rc = goya_stop_queue(hdev,
2198 mmTPC2_QM_GLBL_CFG1,
2199 mmTPC2_QM_CP_STS,
2200 mmTPC2_QM_GLBL_STS0);
2201
2202 if (rc) {
2203 dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
2204 retval = -EIO;
2205 }
2206
2207 rc = goya_stop_queue(hdev,
2208 mmTPC2_CMDQ_GLBL_CFG1,
2209 mmTPC2_CMDQ_CP_STS,
2210 mmTPC2_CMDQ_GLBL_STS0);
2211
2212 if (rc) {
2213 dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2214 retval = -EIO;
2215 }
2216
2217 rc = goya_stop_queue(hdev,
2218 mmTPC3_QM_GLBL_CFG1,
2219 mmTPC3_QM_CP_STS,
2220 mmTPC3_QM_GLBL_STS0);
2221
2222 if (rc) {
2223 dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2224 retval = -EIO;
2225 }
2226
2227 rc = goya_stop_queue(hdev,
2228 mmTPC3_CMDQ_GLBL_CFG1,
2229 mmTPC3_CMDQ_CP_STS,
2230 mmTPC3_CMDQ_GLBL_STS0);
2231
2232 if (rc) {
2233 dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2234 retval = -EIO;
2235 }
2236
2237 rc = goya_stop_queue(hdev,
2238 mmTPC4_QM_GLBL_CFG1,
2239 mmTPC4_QM_CP_STS,
2240 mmTPC4_QM_GLBL_STS0);
2241
2242 if (rc) {
2243 dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2244 retval = -EIO;
2245 }
2246
2247 rc = goya_stop_queue(hdev,
2248 mmTPC4_CMDQ_GLBL_CFG1,
2249 mmTPC4_CMDQ_CP_STS,
2250 mmTPC4_CMDQ_GLBL_STS0);
2251
2252 if (rc) {
2253 dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2254 retval = -EIO;
2255 }
2256
2257 rc = goya_stop_queue(hdev,
2258 mmTPC5_QM_GLBL_CFG1,
2259 mmTPC5_QM_CP_STS,
2260 mmTPC5_QM_GLBL_STS0);
2261
2262 if (rc) {
2263 dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2264 retval = -EIO;
2265 }
2266
2267 rc = goya_stop_queue(hdev,
2268 mmTPC5_CMDQ_GLBL_CFG1,
2269 mmTPC5_CMDQ_CP_STS,
2270 mmTPC5_CMDQ_GLBL_STS0);
2271
2272 if (rc) {
2273 dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2274 retval = -EIO;
2275 }
2276
2277 rc = goya_stop_queue(hdev,
2278 mmTPC6_QM_GLBL_CFG1,
2279 mmTPC6_QM_CP_STS,
2280 mmTPC6_QM_GLBL_STS0);
2281
2282 if (rc) {
2283 dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2284 retval = -EIO;
2285 }
2286
2287 rc = goya_stop_queue(hdev,
2288 mmTPC6_CMDQ_GLBL_CFG1,
2289 mmTPC6_CMDQ_CP_STS,
2290 mmTPC6_CMDQ_GLBL_STS0);
2291
2292 if (rc) {
2293 dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2294 retval = -EIO;
2295 }
2296
2297 rc = goya_stop_queue(hdev,
2298 mmTPC7_QM_GLBL_CFG1,
2299 mmTPC7_QM_CP_STS,
2300 mmTPC7_QM_GLBL_STS0);
2301
2302 if (rc) {
2303 dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2304 retval = -EIO;
2305 }
2306
2307 rc = goya_stop_queue(hdev,
2308 mmTPC7_CMDQ_GLBL_CFG1,
2309 mmTPC7_CMDQ_CP_STS,
2310 mmTPC7_CMDQ_GLBL_STS0);
2311
2312 if (rc) {
2313 dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2314 retval = -EIO;
2315 }
2316
2317 return retval;
2318 }
2319
2320 static void goya_dma_stall(struct hl_device *hdev)
2321 {
2322 struct goya_device *goya = hdev->asic_specific;
2323
2324 if (!(goya->hw_cap_initialized & HW_CAP_DMA))
2325 return;
2326
2327 WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2328 WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2329 WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2330 WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2331 WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2332 }
2333
2334 static void goya_tpc_stall(struct hl_device *hdev)
2335 {
2336 struct goya_device *goya = hdev->asic_specific;
2337
2338 if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2339 return;
2340
2341 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2342 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2343 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2344 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2345 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2346 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2347 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2348 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2349 }
2350
2351 static void goya_mme_stall(struct hl_device *hdev)
2352 {
2353 struct goya_device *goya = hdev->asic_specific;
2354
2355 if (!(goya->hw_cap_initialized & HW_CAP_MME))
2356 return;
2357
2358 WREG32(mmMME_STALL, 0xFFFFFFFF);
2359 }
2360
2361 static int goya_enable_msix(struct hl_device *hdev)
2362 {
2363 struct goya_device *goya = hdev->asic_specific;
2364 int cq_cnt = hdev->asic_prop.completion_queues_count;
2365 int rc, i, irq_cnt_init, irq;
2366
2367 if (goya->hw_cap_initialized & HW_CAP_MSIX)
2368 return 0;
2369
2370 rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2371 GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2372 if (rc < 0) {
2373 dev_err(hdev->dev,
2374 "MSI-X: Failed to enable support -- %d/%d\n",
2375 GOYA_MSIX_ENTRIES, rc);
2376 return rc;
2377 }
2378
2379 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2380 irq = pci_irq_vector(hdev->pdev, i);
2381 rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2382 &hdev->completion_queue[i]);
2383 if (rc) {
2384 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2385 goto free_irqs;
2386 }
2387 }
2388
2389 irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2390
2391 rc = request_irq(irq, hl_irq_handler_eq, 0,
2392 goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2393 &hdev->event_queue);
2394 if (rc) {
2395 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2396 goto free_irqs;
2397 }
2398
2399 goya->hw_cap_initialized |= HW_CAP_MSIX;
2400 return 0;
2401
2402 free_irqs:
2403 for (i = 0 ; i < irq_cnt_init ; i++)
2404 free_irq(pci_irq_vector(hdev->pdev, i),
2405 &hdev->completion_queue[i]);
2406
2407 pci_free_irq_vectors(hdev->pdev);
2408 return rc;
2409 }
2410
2411 static void goya_sync_irqs(struct hl_device *hdev)
2412 {
2413 struct goya_device *goya = hdev->asic_specific;
2414 int i;
2415
2416 if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2417 return;
2418
2419
2420 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2421 synchronize_irq(pci_irq_vector(hdev->pdev, i));
2422
2423 synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2424 }
2425
2426 static void goya_disable_msix(struct hl_device *hdev)
2427 {
2428 struct goya_device *goya = hdev->asic_specific;
2429 int i, irq;
2430
2431 if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2432 return;
2433
2434 goya_sync_irqs(hdev);
2435
2436 irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2437 free_irq(irq, &hdev->event_queue);
2438
2439 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2440 irq = pci_irq_vector(hdev->pdev, i);
2441 free_irq(irq, &hdev->completion_queue[i]);
2442 }
2443
2444 pci_free_irq_vectors(hdev->pdev);
2445
2446 goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2447 }
2448
2449 static void goya_enable_timestamp(struct hl_device *hdev)
2450 {
2451
2452 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2453
2454
2455 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2456 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2457
2458
2459 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2460 }
2461
2462 static void goya_disable_timestamp(struct hl_device *hdev)
2463 {
2464
2465 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2466 }
2467
2468 static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
2469 {
2470 u32 wait_timeout_ms;
2471
2472 if (hdev->pldm)
2473 wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2474 else
2475 wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2476
2477 goya_stop_external_queues(hdev);
2478 goya_stop_internal_queues(hdev);
2479
2480 msleep(wait_timeout_ms);
2481
2482 goya_dma_stall(hdev);
2483 goya_tpc_stall(hdev);
2484 goya_mme_stall(hdev);
2485
2486 msleep(wait_timeout_ms);
2487
2488 goya_disable_external_queues(hdev);
2489 goya_disable_internal_queues(hdev);
2490
2491 goya_disable_timestamp(hdev);
2492
2493 if (hard_reset) {
2494 goya_disable_msix(hdev);
2495 goya_mmu_remove_device_cpu_mappings(hdev);
2496 } else {
2497 goya_sync_irqs(hdev);
2498 }
2499 }
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509 static int goya_load_firmware_to_device(struct hl_device *hdev)
2510 {
2511 void __iomem *dst;
2512
2513 dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2514
2515 return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst, 0, 0);
2516 }
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526 static int goya_load_boot_fit_to_device(struct hl_device *hdev)
2527 {
2528 void __iomem *dst;
2529
2530 dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2531
2532 return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0);
2533 }
2534
2535 static void goya_init_dynamic_firmware_loader(struct hl_device *hdev)
2536 {
2537 struct dynamic_fw_load_mgr *dynamic_loader;
2538 struct cpu_dyn_regs *dyn_regs;
2539
2540 dynamic_loader = &hdev->fw_loader.dynamic_loader;
2541
2542
2543
2544
2545
2546
2547
2548
2549 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
2550 dyn_regs->kmd_msg_to_cpu =
2551 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
2552 dyn_regs->cpu_cmd_status_to_host =
2553 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
2554
2555 dynamic_loader->wait_for_bl_timeout = GOYA_WAIT_FOR_BL_TIMEOUT_USEC;
2556 }
2557
2558 static void goya_init_static_firmware_loader(struct hl_device *hdev)
2559 {
2560 struct static_fw_load_mgr *static_loader;
2561
2562 static_loader = &hdev->fw_loader.static_loader;
2563
2564 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
2565 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
2566 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
2567 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
2568 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
2569 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
2570 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
2571 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
2572 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
2573 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
2574 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
2575 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
2576 }
2577
2578 static void goya_init_firmware_preload_params(struct hl_device *hdev)
2579 {
2580 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
2581
2582 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
2583 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
2584 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
2585 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
2586 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
2587 pre_fw_load->wait_for_preboot_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
2588 }
2589
2590 static void goya_init_firmware_loader(struct hl_device *hdev)
2591 {
2592 struct asic_fixed_properties *prop = &hdev->asic_prop;
2593 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
2594
2595
2596 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
2597 fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
2598 fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
2599 fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
2600 fw_loader->boot_fit_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
2601 fw_loader->skip_bmc = false;
2602 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
2603 fw_loader->dram_bar_id = DDR_BAR_ID;
2604
2605 if (prop->dynamic_fw_load)
2606 goya_init_dynamic_firmware_loader(hdev);
2607 else
2608 goya_init_static_firmware_loader(hdev);
2609 }
2610
2611 static int goya_init_cpu(struct hl_device *hdev)
2612 {
2613 struct goya_device *goya = hdev->asic_specific;
2614 int rc;
2615
2616 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
2617 return 0;
2618
2619 if (goya->hw_cap_initialized & HW_CAP_CPU)
2620 return 0;
2621
2622
2623
2624
2625
2626 if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2627 dev_err(hdev->dev,
2628 "failed to map DDR bar to DRAM base address\n");
2629 return -EIO;
2630 }
2631
2632 rc = hl_fw_init_cpu(hdev);
2633
2634 if (rc)
2635 return rc;
2636
2637 goya->hw_cap_initialized |= HW_CAP_CPU;
2638
2639 return 0;
2640 }
2641
2642 static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2643 u64 phys_addr)
2644 {
2645 u32 status, timeout_usec;
2646 int rc;
2647
2648 if (hdev->pldm)
2649 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2650 else
2651 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2652
2653 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2654 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2655 WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2656
2657 rc = hl_poll_timeout(
2658 hdev,
2659 MMU_ASID_BUSY,
2660 status,
2661 !(status & 0x80000000),
2662 1000,
2663 timeout_usec);
2664
2665 if (rc) {
2666 dev_err(hdev->dev,
2667 "Timeout during MMU hop0 config of asid %d\n", asid);
2668 return rc;
2669 }
2670
2671 return 0;
2672 }
2673
2674 int goya_mmu_init(struct hl_device *hdev)
2675 {
2676 struct asic_fixed_properties *prop = &hdev->asic_prop;
2677 struct goya_device *goya = hdev->asic_specific;
2678 u64 hop0_addr;
2679 int rc, i;
2680
2681 if (!hdev->mmu_enable)
2682 return 0;
2683
2684 if (goya->hw_cap_initialized & HW_CAP_MMU)
2685 return 0;
2686
2687 hdev->dram_default_page_mapping = true;
2688
2689 for (i = 0 ; i < prop->max_asid ; i++) {
2690 hop0_addr = prop->mmu_pgt_addr +
2691 (i * prop->mmu_hop_table_size);
2692
2693 rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2694 if (rc) {
2695 dev_err(hdev->dev,
2696 "failed to set hop0 addr for asid %d\n", i);
2697 goto err;
2698 }
2699 }
2700
2701 goya->hw_cap_initialized |= HW_CAP_MMU;
2702
2703
2704 WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2705 lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2706 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2707
2708
2709 WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2710 (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2711
2712 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
2713
2714 WREG32(mmMMU_MMU_ENABLE, 1);
2715 WREG32(mmMMU_SPI_MASK, 0xF);
2716
2717 return 0;
2718
2719 err:
2720 return rc;
2721 }
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731 static int goya_hw_init(struct hl_device *hdev)
2732 {
2733 struct asic_fixed_properties *prop = &hdev->asic_prop;
2734 int rc;
2735
2736
2737 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2738
2739
2740
2741
2742
2743
2744
2745 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2746
2747 rc = goya_init_cpu(hdev);
2748 if (rc) {
2749 dev_err(hdev->dev, "failed to initialize CPU\n");
2750 return rc;
2751 }
2752
2753 goya_tpc_mbist_workaround(hdev);
2754
2755 goya_init_golden_registers(hdev);
2756
2757
2758
2759
2760
2761 if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
2762 ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2763 dev_err(hdev->dev,
2764 "failed to map DDR bar to MMU page tables\n");
2765 return -EIO;
2766 }
2767
2768 rc = goya_mmu_init(hdev);
2769 if (rc)
2770 return rc;
2771
2772 goya_init_security(hdev);
2773
2774 goya_init_dma_qmans(hdev);
2775
2776 goya_init_mme_qmans(hdev);
2777
2778 goya_init_tpc_qmans(hdev);
2779
2780 goya_enable_timestamp(hdev);
2781
2782
2783 rc = goya_enable_msix(hdev);
2784 if (rc)
2785 goto disable_queues;
2786
2787
2788 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2789
2790 return 0;
2791
2792 disable_queues:
2793 goya_disable_internal_queues(hdev);
2794 goya_disable_external_queues(hdev);
2795
2796 return rc;
2797 }
2798
2799 static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
2800 {
2801 struct goya_device *goya = hdev->asic_specific;
2802 u32 reset_timeout_ms, cpu_timeout_ms, status;
2803
2804 if (hdev->pldm) {
2805 reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2806 cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2807 } else {
2808 reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2809 cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2810 }
2811
2812 if (hard_reset) {
2813
2814
2815
2816 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2817 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2818 GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2819
2820 msleep(cpu_timeout_ms);
2821
2822 goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2823 goya_disable_clk_rlx(hdev);
2824 goya_set_pll_refclk(hdev);
2825
2826 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2827 dev_dbg(hdev->dev,
2828 "Issued HARD reset command, going to wait %dms\n",
2829 reset_timeout_ms);
2830 } else {
2831 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2832 dev_dbg(hdev->dev,
2833 "Issued SOFT reset command, going to wait %dms\n",
2834 reset_timeout_ms);
2835 }
2836
2837
2838
2839
2840
2841
2842 msleep(reset_timeout_ms);
2843
2844 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2845 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2846 dev_err(hdev->dev,
2847 "Timeout while waiting for device to reset 0x%x\n",
2848 status);
2849
2850 if (!hard_reset && goya) {
2851 goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2852 HW_CAP_GOLDEN | HW_CAP_TPC);
2853 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2854 GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2855 return;
2856 }
2857
2858
2859 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2860 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2861
2862 WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2863 0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2864
2865 if (goya) {
2866 goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2867 HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2868 HW_CAP_DMA | HW_CAP_MME |
2869 HW_CAP_MMU | HW_CAP_TPC_MBIST |
2870 HW_CAP_GOLDEN | HW_CAP_TPC);
2871
2872 memset(goya->events_stat, 0, sizeof(goya->events_stat));
2873 }
2874 }
2875
2876 int goya_suspend(struct hl_device *hdev)
2877 {
2878 int rc;
2879
2880 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2881 if (rc)
2882 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2883
2884 return rc;
2885 }
2886
2887 int goya_resume(struct hl_device *hdev)
2888 {
2889 return goya_init_iatu(hdev);
2890 }
2891
2892 static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2893 void *cpu_addr, dma_addr_t dma_addr, size_t size)
2894 {
2895 int rc;
2896
2897 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2898 VM_DONTCOPY | VM_NORESERVE;
2899
2900 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
2901 (dma_addr - HOST_PHYS_BASE), size);
2902 if (rc)
2903 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
2904
2905 return rc;
2906 }
2907
2908 void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2909 {
2910 u32 db_reg_offset, db_value;
2911
2912 switch (hw_queue_id) {
2913 case GOYA_QUEUE_ID_DMA_0:
2914 db_reg_offset = mmDMA_QM_0_PQ_PI;
2915 break;
2916
2917 case GOYA_QUEUE_ID_DMA_1:
2918 db_reg_offset = mmDMA_QM_1_PQ_PI;
2919 break;
2920
2921 case GOYA_QUEUE_ID_DMA_2:
2922 db_reg_offset = mmDMA_QM_2_PQ_PI;
2923 break;
2924
2925 case GOYA_QUEUE_ID_DMA_3:
2926 db_reg_offset = mmDMA_QM_3_PQ_PI;
2927 break;
2928
2929 case GOYA_QUEUE_ID_DMA_4:
2930 db_reg_offset = mmDMA_QM_4_PQ_PI;
2931 break;
2932
2933 case GOYA_QUEUE_ID_CPU_PQ:
2934 db_reg_offset = mmCPU_IF_PF_PQ_PI;
2935 break;
2936
2937 case GOYA_QUEUE_ID_MME:
2938 db_reg_offset = mmMME_QM_PQ_PI;
2939 break;
2940
2941 case GOYA_QUEUE_ID_TPC0:
2942 db_reg_offset = mmTPC0_QM_PQ_PI;
2943 break;
2944
2945 case GOYA_QUEUE_ID_TPC1:
2946 db_reg_offset = mmTPC1_QM_PQ_PI;
2947 break;
2948
2949 case GOYA_QUEUE_ID_TPC2:
2950 db_reg_offset = mmTPC2_QM_PQ_PI;
2951 break;
2952
2953 case GOYA_QUEUE_ID_TPC3:
2954 db_reg_offset = mmTPC3_QM_PQ_PI;
2955 break;
2956
2957 case GOYA_QUEUE_ID_TPC4:
2958 db_reg_offset = mmTPC4_QM_PQ_PI;
2959 break;
2960
2961 case GOYA_QUEUE_ID_TPC5:
2962 db_reg_offset = mmTPC5_QM_PQ_PI;
2963 break;
2964
2965 case GOYA_QUEUE_ID_TPC6:
2966 db_reg_offset = mmTPC6_QM_PQ_PI;
2967 break;
2968
2969 case GOYA_QUEUE_ID_TPC7:
2970 db_reg_offset = mmTPC7_QM_PQ_PI;
2971 break;
2972
2973 default:
2974
2975 dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2976 hw_queue_id);
2977 return;
2978 }
2979
2980 db_value = pi;
2981
2982
2983 WREG32(db_reg_offset, db_value);
2984
2985 if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) {
2986
2987 mb();
2988 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2989 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2990 }
2991 }
2992
2993 void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2994 {
2995
2996 memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2997 }
2998
2999 static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3000 dma_addr_t *dma_handle, gfp_t flags)
3001 {
3002 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3003 dma_handle, flags);
3004
3005
3006 if (kernel_addr)
3007 *dma_handle += HOST_PHYS_BASE;
3008
3009 return kernel_addr;
3010 }
3011
3012 static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
3013 void *cpu_addr, dma_addr_t dma_handle)
3014 {
3015
3016 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3017
3018 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3019 }
3020
3021 int goya_scrub_device_mem(struct hl_device *hdev)
3022 {
3023 return 0;
3024 }
3025
3026 void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
3027 dma_addr_t *dma_handle, u16 *queue_len)
3028 {
3029 void *base;
3030 u32 offset;
3031
3032 *dma_handle = hdev->asic_prop.sram_base_address;
3033
3034 base = (__force void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
3035
3036 switch (queue_id) {
3037 case GOYA_QUEUE_ID_MME:
3038 offset = MME_QMAN_BASE_OFFSET;
3039 *queue_len = MME_QMAN_LENGTH;
3040 break;
3041 case GOYA_QUEUE_ID_TPC0:
3042 offset = TPC0_QMAN_BASE_OFFSET;
3043 *queue_len = TPC_QMAN_LENGTH;
3044 break;
3045 case GOYA_QUEUE_ID_TPC1:
3046 offset = TPC1_QMAN_BASE_OFFSET;
3047 *queue_len = TPC_QMAN_LENGTH;
3048 break;
3049 case GOYA_QUEUE_ID_TPC2:
3050 offset = TPC2_QMAN_BASE_OFFSET;
3051 *queue_len = TPC_QMAN_LENGTH;
3052 break;
3053 case GOYA_QUEUE_ID_TPC3:
3054 offset = TPC3_QMAN_BASE_OFFSET;
3055 *queue_len = TPC_QMAN_LENGTH;
3056 break;
3057 case GOYA_QUEUE_ID_TPC4:
3058 offset = TPC4_QMAN_BASE_OFFSET;
3059 *queue_len = TPC_QMAN_LENGTH;
3060 break;
3061 case GOYA_QUEUE_ID_TPC5:
3062 offset = TPC5_QMAN_BASE_OFFSET;
3063 *queue_len = TPC_QMAN_LENGTH;
3064 break;
3065 case GOYA_QUEUE_ID_TPC6:
3066 offset = TPC6_QMAN_BASE_OFFSET;
3067 *queue_len = TPC_QMAN_LENGTH;
3068 break;
3069 case GOYA_QUEUE_ID_TPC7:
3070 offset = TPC7_QMAN_BASE_OFFSET;
3071 *queue_len = TPC_QMAN_LENGTH;
3072 break;
3073 default:
3074 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3075 return NULL;
3076 }
3077
3078 base += offset;
3079 *dma_handle += offset;
3080
3081 return base;
3082 }
3083
3084 static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
3085 {
3086 struct packet_msg_prot *fence_pkt;
3087 u32 *fence_ptr;
3088 dma_addr_t fence_dma_addr;
3089 struct hl_cb *cb;
3090 u32 tmp, timeout;
3091 int rc;
3092
3093 if (hdev->pldm)
3094 timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
3095 else
3096 timeout = HL_DEVICE_TIMEOUT_USEC;
3097
3098 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
3099 dev_err_ratelimited(hdev->dev,
3100 "Can't send driver job on QMAN0 because the device is not idle\n");
3101 return -EBUSY;
3102 }
3103
3104 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
3105 if (!fence_ptr) {
3106 dev_err(hdev->dev,
3107 "Failed to allocate fence memory for QMAN0\n");
3108 return -ENOMEM;
3109 }
3110
3111 goya_qman0_set_security(hdev, true);
3112
3113 cb = job->patched_cb;
3114
3115 fence_pkt = cb->kernel_address +
3116 job->job_cb_size - sizeof(struct packet_msg_prot);
3117
3118 tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3119 (1 << GOYA_PKT_CTL_EB_SHIFT) |
3120 (1 << GOYA_PKT_CTL_MB_SHIFT);
3121 fence_pkt->ctl = cpu_to_le32(tmp);
3122 fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
3123 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3124
3125 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
3126 job->job_cb_size, cb->bus_address);
3127 if (rc) {
3128 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
3129 goto free_fence_ptr;
3130 }
3131
3132 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
3133 (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
3134 timeout, true);
3135
3136 hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
3137
3138 if (rc == -ETIMEDOUT) {
3139 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
3140 goto free_fence_ptr;
3141 }
3142
3143 free_fence_ptr:
3144 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
3145
3146 goya_qman0_set_security(hdev, false);
3147
3148 return rc;
3149 }
3150
3151 int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
3152 u32 timeout, u64 *result)
3153 {
3154 struct goya_device *goya = hdev->asic_specific;
3155
3156 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
3157 if (result)
3158 *result = 0;
3159 return 0;
3160 }
3161
3162 if (!timeout)
3163 timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
3164
3165 return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
3166 timeout, result);
3167 }
3168
3169 int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3170 {
3171 struct packet_msg_prot *fence_pkt;
3172 dma_addr_t pkt_dma_addr;
3173 u32 fence_val, tmp;
3174 dma_addr_t fence_dma_addr;
3175 u32 *fence_ptr;
3176 int rc;
3177
3178 fence_val = GOYA_QMAN0_FENCE_VAL;
3179
3180 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
3181 if (!fence_ptr) {
3182 dev_err(hdev->dev,
3183 "Failed to allocate memory for H/W queue %d testing\n",
3184 hw_queue_id);
3185 return -ENOMEM;
3186 }
3187
3188 *fence_ptr = 0;
3189
3190 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
3191 &pkt_dma_addr);
3192 if (!fence_pkt) {
3193 dev_err(hdev->dev,
3194 "Failed to allocate packet for H/W queue %d testing\n",
3195 hw_queue_id);
3196 rc = -ENOMEM;
3197 goto free_fence_ptr;
3198 }
3199
3200 tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3201 (1 << GOYA_PKT_CTL_EB_SHIFT) |
3202 (1 << GOYA_PKT_CTL_MB_SHIFT);
3203 fence_pkt->ctl = cpu_to_le32(tmp);
3204 fence_pkt->value = cpu_to_le32(fence_val);
3205 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3206
3207 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3208 sizeof(struct packet_msg_prot),
3209 pkt_dma_addr);
3210 if (rc) {
3211 dev_err(hdev->dev,
3212 "Failed to send fence packet to H/W queue %d\n",
3213 hw_queue_id);
3214 goto free_pkt;
3215 }
3216
3217 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3218 1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
3219
3220 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3221
3222 if (rc == -ETIMEDOUT) {
3223 dev_err(hdev->dev,
3224 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3225 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3226 rc = -EIO;
3227 }
3228
3229 free_pkt:
3230 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
3231 free_fence_ptr:
3232 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
3233 return rc;
3234 }
3235
3236 int goya_test_cpu_queue(struct hl_device *hdev)
3237 {
3238 struct goya_device *goya = hdev->asic_specific;
3239
3240
3241
3242
3243
3244 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3245 return 0;
3246
3247 return hl_fw_test_cpu_queue(hdev);
3248 }
3249
3250 int goya_test_queues(struct hl_device *hdev)
3251 {
3252 int i, rc, ret_val = 0;
3253
3254 for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3255 rc = goya_test_queue(hdev, i);
3256 if (rc)
3257 ret_val = -EINVAL;
3258 }
3259
3260 return ret_val;
3261 }
3262
3263 static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3264 gfp_t mem_flags, dma_addr_t *dma_handle)
3265 {
3266 void *kernel_addr;
3267
3268 if (size > GOYA_DMA_POOL_BLK_SIZE)
3269 return NULL;
3270
3271 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3272
3273
3274 if (kernel_addr)
3275 *dma_handle += HOST_PHYS_BASE;
3276
3277 return kernel_addr;
3278 }
3279
3280 static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3281 dma_addr_t dma_addr)
3282 {
3283
3284 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3285
3286 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3287 }
3288
3289 void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3290 dma_addr_t *dma_handle)
3291 {
3292 void *vaddr;
3293
3294 vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3295 *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3296 VA_CPU_ACCESSIBLE_MEM_ADDR;
3297
3298 return vaddr;
3299 }
3300
3301 void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3302 void *vaddr)
3303 {
3304 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3305 }
3306
3307 u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3308 {
3309 struct scatterlist *sg, *sg_next_iter;
3310 u32 count, dma_desc_cnt;
3311 u64 len, len_next;
3312 dma_addr_t addr, addr_next;
3313
3314 dma_desc_cnt = 0;
3315
3316 for_each_sgtable_dma_sg(sgt, sg, count) {
3317 len = sg_dma_len(sg);
3318 addr = sg_dma_address(sg);
3319
3320 if (len == 0)
3321 break;
3322
3323 while ((count + 1) < sgt->nents) {
3324 sg_next_iter = sg_next(sg);
3325 len_next = sg_dma_len(sg_next_iter);
3326 addr_next = sg_dma_address(sg_next_iter);
3327
3328 if (len_next == 0)
3329 break;
3330
3331 if ((addr + len == addr_next) &&
3332 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3333 len += len_next;
3334 count++;
3335 sg = sg_next_iter;
3336 } else {
3337 break;
3338 }
3339 }
3340
3341 dma_desc_cnt++;
3342 }
3343
3344 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3345 }
3346
3347 static int goya_pin_memory_before_cs(struct hl_device *hdev,
3348 struct hl_cs_parser *parser,
3349 struct packet_lin_dma *user_dma_pkt,
3350 u64 addr, enum dma_data_direction dir)
3351 {
3352 struct hl_userptr *userptr;
3353 int rc;
3354
3355 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3356 parser->job_userptr_list, &userptr))
3357 goto already_pinned;
3358
3359 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
3360 if (!userptr)
3361 return -ENOMEM;
3362
3363 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3364 userptr);
3365 if (rc)
3366 goto free_userptr;
3367
3368 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3369
3370 rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
3371 if (rc) {
3372 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3373 goto unpin_memory;
3374 }
3375
3376 userptr->dma_mapped = true;
3377 userptr->dir = dir;
3378
3379 already_pinned:
3380 parser->patched_cb_size +=
3381 goya_get_dma_desc_list_size(hdev, userptr->sgt);
3382
3383 return 0;
3384
3385 unpin_memory:
3386 list_del(&userptr->job_node);
3387 hl_unpin_host_memory(hdev, userptr);
3388 free_userptr:
3389 kfree(userptr);
3390 return rc;
3391 }
3392
3393 static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3394 struct hl_cs_parser *parser,
3395 struct packet_lin_dma *user_dma_pkt)
3396 {
3397 u64 device_memory_addr, addr;
3398 enum dma_data_direction dir;
3399 enum hl_goya_dma_direction user_dir;
3400 bool sram_addr = true;
3401 bool skip_host_mem_pin = false;
3402 bool user_memset;
3403 u32 ctl;
3404 int rc = 0;
3405
3406 ctl = le32_to_cpu(user_dma_pkt->ctl);
3407
3408 user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3409 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3410
3411 user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3412 GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3413
3414 switch (user_dir) {
3415 case HL_DMA_HOST_TO_DRAM:
3416 dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3417 dir = DMA_TO_DEVICE;
3418 sram_addr = false;
3419 addr = le64_to_cpu(user_dma_pkt->src_addr);
3420 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3421 if (user_memset)
3422 skip_host_mem_pin = true;
3423 break;
3424
3425 case HL_DMA_DRAM_TO_HOST:
3426 dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3427 dir = DMA_FROM_DEVICE;
3428 sram_addr = false;
3429 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3430 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3431 break;
3432
3433 case HL_DMA_HOST_TO_SRAM:
3434 dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3435 dir = DMA_TO_DEVICE;
3436 addr = le64_to_cpu(user_dma_pkt->src_addr);
3437 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3438 if (user_memset)
3439 skip_host_mem_pin = true;
3440 break;
3441
3442 case HL_DMA_SRAM_TO_HOST:
3443 dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3444 dir = DMA_FROM_DEVICE;
3445 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3446 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3447 break;
3448 default:
3449 dev_err(hdev->dev, "DMA direction %d is unsupported/undefined\n", user_dir);
3450 return -EFAULT;
3451 }
3452
3453 if (sram_addr) {
3454 if (!hl_mem_area_inside_range(device_memory_addr,
3455 le32_to_cpu(user_dma_pkt->tsize),
3456 hdev->asic_prop.sram_user_base_address,
3457 hdev->asic_prop.sram_end_address)) {
3458
3459 dev_err(hdev->dev,
3460 "SRAM address 0x%llx + 0x%x is invalid\n",
3461 device_memory_addr,
3462 user_dma_pkt->tsize);
3463 return -EFAULT;
3464 }
3465 } else {
3466 if (!hl_mem_area_inside_range(device_memory_addr,
3467 le32_to_cpu(user_dma_pkt->tsize),
3468 hdev->asic_prop.dram_user_base_address,
3469 hdev->asic_prop.dram_end_address)) {
3470
3471 dev_err(hdev->dev,
3472 "DRAM address 0x%llx + 0x%x is invalid\n",
3473 device_memory_addr,
3474 user_dma_pkt->tsize);
3475 return -EFAULT;
3476 }
3477 }
3478
3479 if (skip_host_mem_pin)
3480 parser->patched_cb_size += sizeof(*user_dma_pkt);
3481 else {
3482 if ((dir == DMA_TO_DEVICE) &&
3483 (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3484 dev_err(hdev->dev,
3485 "Can't DMA from host on queue other then 1\n");
3486 return -EFAULT;
3487 }
3488
3489 rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3490 addr, dir);
3491 }
3492
3493 return rc;
3494 }
3495
3496 static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3497 struct hl_cs_parser *parser,
3498 struct packet_lin_dma *user_dma_pkt)
3499 {
3500 u64 sram_memory_addr, dram_memory_addr;
3501 enum hl_goya_dma_direction user_dir;
3502 u32 ctl;
3503
3504 ctl = le32_to_cpu(user_dma_pkt->ctl);
3505 user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3506 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3507
3508 if (user_dir == HL_DMA_DRAM_TO_SRAM) {
3509 dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3510 dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3511 sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3512 } else {
3513 dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3514 sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3515 dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3516 }
3517
3518 if (!hl_mem_area_inside_range(sram_memory_addr,
3519 le32_to_cpu(user_dma_pkt->tsize),
3520 hdev->asic_prop.sram_user_base_address,
3521 hdev->asic_prop.sram_end_address)) {
3522 dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3523 sram_memory_addr, user_dma_pkt->tsize);
3524 return -EFAULT;
3525 }
3526
3527 if (!hl_mem_area_inside_range(dram_memory_addr,
3528 le32_to_cpu(user_dma_pkt->tsize),
3529 hdev->asic_prop.dram_user_base_address,
3530 hdev->asic_prop.dram_end_address)) {
3531 dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3532 dram_memory_addr, user_dma_pkt->tsize);
3533 return -EFAULT;
3534 }
3535
3536 parser->patched_cb_size += sizeof(*user_dma_pkt);
3537
3538 return 0;
3539 }
3540
3541 static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3542 struct hl_cs_parser *parser,
3543 struct packet_lin_dma *user_dma_pkt)
3544 {
3545 enum hl_goya_dma_direction user_dir;
3546 u32 ctl;
3547 int rc;
3548
3549 dev_dbg(hdev->dev, "DMA packet details:\n");
3550 dev_dbg(hdev->dev, "source == 0x%llx\n",
3551 le64_to_cpu(user_dma_pkt->src_addr));
3552 dev_dbg(hdev->dev, "destination == 0x%llx\n",
3553 le64_to_cpu(user_dma_pkt->dst_addr));
3554 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3555
3556 ctl = le32_to_cpu(user_dma_pkt->ctl);
3557 user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3558 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3559
3560
3561
3562
3563
3564 if (user_dma_pkt->tsize == 0) {
3565 dev_err(hdev->dev,
3566 "Got DMA with size 0, might reset the device\n");
3567 return -EINVAL;
3568 }
3569
3570 if ((user_dir == HL_DMA_DRAM_TO_SRAM) || (user_dir == HL_DMA_SRAM_TO_DRAM))
3571 rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3572 else
3573 rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3574
3575 return rc;
3576 }
3577
3578 static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3579 struct hl_cs_parser *parser,
3580 struct packet_lin_dma *user_dma_pkt)
3581 {
3582 dev_dbg(hdev->dev, "DMA packet details:\n");
3583 dev_dbg(hdev->dev, "source == 0x%llx\n",
3584 le64_to_cpu(user_dma_pkt->src_addr));
3585 dev_dbg(hdev->dev, "destination == 0x%llx\n",
3586 le64_to_cpu(user_dma_pkt->dst_addr));
3587 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3588
3589
3590
3591
3592
3593
3594 if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3595 hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3596 le32_to_cpu(user_dma_pkt->tsize),
3597 hdev->asic_prop.pmmu.start_addr,
3598 hdev->asic_prop.pmmu.end_addr)) {
3599 dev_err(hdev->dev,
3600 "Can't DMA from host on queue other then 1\n");
3601 return -EFAULT;
3602 }
3603
3604 if (user_dma_pkt->tsize == 0) {
3605 dev_err(hdev->dev,
3606 "Got DMA with size 0, might reset the device\n");
3607 return -EINVAL;
3608 }
3609
3610 parser->patched_cb_size += sizeof(*user_dma_pkt);
3611
3612 return 0;
3613 }
3614
3615 static int goya_validate_wreg32(struct hl_device *hdev,
3616 struct hl_cs_parser *parser,
3617 struct packet_wreg32 *wreg_pkt)
3618 {
3619 struct goya_device *goya = hdev->asic_specific;
3620 u32 sob_start_addr, sob_end_addr;
3621 u16 reg_offset;
3622
3623 reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3624 GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3625
3626 dev_dbg(hdev->dev, "WREG32 packet details:\n");
3627 dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3628 dev_dbg(hdev->dev, "value == 0x%x\n",
3629 le32_to_cpu(wreg_pkt->value));
3630
3631 if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3632 dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3633 reg_offset);
3634 return -EPERM;
3635 }
3636
3637
3638
3639
3640
3641
3642 if (goya->hw_cap_initialized & HW_CAP_MMU)
3643 return 0;
3644
3645 sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3646 sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3647
3648 if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3649 (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3650
3651 dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3652 wreg_pkt->value);
3653 return -EPERM;
3654 }
3655
3656 return 0;
3657 }
3658
3659 static int goya_validate_cb(struct hl_device *hdev,
3660 struct hl_cs_parser *parser, bool is_mmu)
3661 {
3662 u32 cb_parsed_length = 0;
3663 int rc = 0;
3664
3665 parser->patched_cb_size = 0;
3666
3667
3668 while (cb_parsed_length < parser->user_cb_size) {
3669 enum packet_id pkt_id;
3670 u16 pkt_size;
3671 struct goya_packet *user_pkt;
3672
3673 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3674
3675 pkt_id = (enum packet_id) (
3676 (le64_to_cpu(user_pkt->header) &
3677 PACKET_HEADER_PACKET_ID_MASK) >>
3678 PACKET_HEADER_PACKET_ID_SHIFT);
3679
3680 if (!validate_packet_id(pkt_id)) {
3681 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3682 rc = -EINVAL;
3683 break;
3684 }
3685
3686 pkt_size = goya_packet_sizes[pkt_id];
3687 cb_parsed_length += pkt_size;
3688 if (cb_parsed_length > parser->user_cb_size) {
3689 dev_err(hdev->dev,
3690 "packet 0x%x is out of CB boundary\n", pkt_id);
3691 rc = -EINVAL;
3692 break;
3693 }
3694
3695 switch (pkt_id) {
3696 case PACKET_WREG_32:
3697
3698
3699
3700
3701
3702 rc = goya_validate_wreg32(hdev,
3703 parser, (struct packet_wreg32 *) user_pkt);
3704 parser->patched_cb_size += pkt_size;
3705 break;
3706
3707 case PACKET_WREG_BULK:
3708 dev_err(hdev->dev,
3709 "User not allowed to use WREG_BULK\n");
3710 rc = -EPERM;
3711 break;
3712
3713 case PACKET_MSG_PROT:
3714 dev_err(hdev->dev,
3715 "User not allowed to use MSG_PROT\n");
3716 rc = -EPERM;
3717 break;
3718
3719 case PACKET_CP_DMA:
3720 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3721 rc = -EPERM;
3722 break;
3723
3724 case PACKET_STOP:
3725 dev_err(hdev->dev, "User not allowed to use STOP\n");
3726 rc = -EPERM;
3727 break;
3728
3729 case PACKET_LIN_DMA:
3730 if (is_mmu)
3731 rc = goya_validate_dma_pkt_mmu(hdev, parser,
3732 (struct packet_lin_dma *) user_pkt);
3733 else
3734 rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3735 (struct packet_lin_dma *) user_pkt);
3736 break;
3737
3738 case PACKET_MSG_LONG:
3739 case PACKET_MSG_SHORT:
3740 case PACKET_FENCE:
3741 case PACKET_NOP:
3742 parser->patched_cb_size += pkt_size;
3743 break;
3744
3745 default:
3746 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3747 pkt_id);
3748 rc = -EINVAL;
3749 break;
3750 }
3751
3752 if (rc)
3753 break;
3754 }
3755
3756
3757
3758
3759
3760
3761 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3762
3763 return rc;
3764 }
3765
3766 static int goya_patch_dma_packet(struct hl_device *hdev,
3767 struct hl_cs_parser *parser,
3768 struct packet_lin_dma *user_dma_pkt,
3769 struct packet_lin_dma *new_dma_pkt,
3770 u32 *new_dma_pkt_size)
3771 {
3772 struct hl_userptr *userptr;
3773 struct scatterlist *sg, *sg_next_iter;
3774 u32 count, dma_desc_cnt;
3775 u64 len, len_next;
3776 dma_addr_t dma_addr, dma_addr_next;
3777 enum hl_goya_dma_direction user_dir;
3778 u64 device_memory_addr, addr;
3779 enum dma_data_direction dir;
3780 struct sg_table *sgt;
3781 bool skip_host_mem_pin = false;
3782 bool user_memset;
3783 u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3784
3785 ctl = le32_to_cpu(user_dma_pkt->ctl);
3786
3787 user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3788 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3789
3790 user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3791 GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3792
3793 if ((user_dir == HL_DMA_DRAM_TO_SRAM) || (user_dir == HL_DMA_SRAM_TO_DRAM) ||
3794 (user_dma_pkt->tsize == 0)) {
3795 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3796 *new_dma_pkt_size = sizeof(*new_dma_pkt);
3797 return 0;
3798 }
3799
3800 if ((user_dir == HL_DMA_HOST_TO_DRAM) || (user_dir == HL_DMA_HOST_TO_SRAM)) {
3801 addr = le64_to_cpu(user_dma_pkt->src_addr);
3802 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3803 dir = DMA_TO_DEVICE;
3804 if (user_memset)
3805 skip_host_mem_pin = true;
3806 } else {
3807 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3808 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3809 dir = DMA_FROM_DEVICE;
3810 }
3811
3812 if ((!skip_host_mem_pin) &&
3813 (hl_userptr_is_pinned(hdev, addr,
3814 le32_to_cpu(user_dma_pkt->tsize),
3815 parser->job_userptr_list, &userptr) == false)) {
3816 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3817 addr, user_dma_pkt->tsize);
3818 return -EFAULT;
3819 }
3820
3821 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3822 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3823 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3824 return 0;
3825 }
3826
3827 user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3828
3829 user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3830
3831 sgt = userptr->sgt;
3832 dma_desc_cnt = 0;
3833
3834 for_each_sgtable_dma_sg(sgt, sg, count) {
3835 len = sg_dma_len(sg);
3836 dma_addr = sg_dma_address(sg);
3837
3838 if (len == 0)
3839 break;
3840
3841 while ((count + 1) < sgt->nents) {
3842 sg_next_iter = sg_next(sg);
3843 len_next = sg_dma_len(sg_next_iter);
3844 dma_addr_next = sg_dma_address(sg_next_iter);
3845
3846 if (len_next == 0)
3847 break;
3848
3849 if ((dma_addr + len == dma_addr_next) &&
3850 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3851 len += len_next;
3852 count++;
3853 sg = sg_next_iter;
3854 } else {
3855 break;
3856 }
3857 }
3858
3859 ctl = le32_to_cpu(user_dma_pkt->ctl);
3860 if (likely(dma_desc_cnt))
3861 ctl &= ~GOYA_PKT_CTL_EB_MASK;
3862 ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3863 GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3864 new_dma_pkt->ctl = cpu_to_le32(ctl);
3865 new_dma_pkt->tsize = cpu_to_le32((u32) len);
3866
3867 if (dir == DMA_TO_DEVICE) {
3868 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3869 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3870 } else {
3871 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3872 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3873 }
3874
3875 if (!user_memset)
3876 device_memory_addr += len;
3877 dma_desc_cnt++;
3878 new_dma_pkt++;
3879 }
3880
3881 if (!dma_desc_cnt) {
3882 dev_err(hdev->dev,
3883 "Error of 0 SG entries when patching DMA packet\n");
3884 return -EFAULT;
3885 }
3886
3887
3888 new_dma_pkt--;
3889 new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3890
3891 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3892
3893 return 0;
3894 }
3895
3896 static int goya_patch_cb(struct hl_device *hdev,
3897 struct hl_cs_parser *parser)
3898 {
3899 u32 cb_parsed_length = 0;
3900 u32 cb_patched_cur_length = 0;
3901 int rc = 0;
3902
3903
3904 while (cb_parsed_length < parser->user_cb_size) {
3905 enum packet_id pkt_id;
3906 u16 pkt_size;
3907 u32 new_pkt_size = 0;
3908 struct goya_packet *user_pkt, *kernel_pkt;
3909
3910 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3911 kernel_pkt = parser->patched_cb->kernel_address +
3912 cb_patched_cur_length;
3913
3914 pkt_id = (enum packet_id) (
3915 (le64_to_cpu(user_pkt->header) &
3916 PACKET_HEADER_PACKET_ID_MASK) >>
3917 PACKET_HEADER_PACKET_ID_SHIFT);
3918
3919 if (!validate_packet_id(pkt_id)) {
3920 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3921 rc = -EINVAL;
3922 break;
3923 }
3924
3925 pkt_size = goya_packet_sizes[pkt_id];
3926 cb_parsed_length += pkt_size;
3927 if (cb_parsed_length > parser->user_cb_size) {
3928 dev_err(hdev->dev,
3929 "packet 0x%x is out of CB boundary\n", pkt_id);
3930 rc = -EINVAL;
3931 break;
3932 }
3933
3934 switch (pkt_id) {
3935 case PACKET_LIN_DMA:
3936 rc = goya_patch_dma_packet(hdev, parser,
3937 (struct packet_lin_dma *) user_pkt,
3938 (struct packet_lin_dma *) kernel_pkt,
3939 &new_pkt_size);
3940 cb_patched_cur_length += new_pkt_size;
3941 break;
3942
3943 case PACKET_WREG_32:
3944 memcpy(kernel_pkt, user_pkt, pkt_size);
3945 cb_patched_cur_length += pkt_size;
3946 rc = goya_validate_wreg32(hdev, parser,
3947 (struct packet_wreg32 *) kernel_pkt);
3948 break;
3949
3950 case PACKET_WREG_BULK:
3951 dev_err(hdev->dev,
3952 "User not allowed to use WREG_BULK\n");
3953 rc = -EPERM;
3954 break;
3955
3956 case PACKET_MSG_PROT:
3957 dev_err(hdev->dev,
3958 "User not allowed to use MSG_PROT\n");
3959 rc = -EPERM;
3960 break;
3961
3962 case PACKET_CP_DMA:
3963 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3964 rc = -EPERM;
3965 break;
3966
3967 case PACKET_STOP:
3968 dev_err(hdev->dev, "User not allowed to use STOP\n");
3969 rc = -EPERM;
3970 break;
3971
3972 case PACKET_MSG_LONG:
3973 case PACKET_MSG_SHORT:
3974 case PACKET_FENCE:
3975 case PACKET_NOP:
3976 memcpy(kernel_pkt, user_pkt, pkt_size);
3977 cb_patched_cur_length += pkt_size;
3978 break;
3979
3980 default:
3981 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3982 pkt_id);
3983 rc = -EINVAL;
3984 break;
3985 }
3986
3987 if (rc)
3988 break;
3989 }
3990
3991 return rc;
3992 }
3993
3994 static int goya_parse_cb_mmu(struct hl_device *hdev,
3995 struct hl_cs_parser *parser)
3996 {
3997 u64 handle;
3998 u32 patched_cb_size;
3999 struct hl_cb *user_cb;
4000 int rc;
4001
4002
4003
4004
4005
4006
4007 parser->patched_cb_size = parser->user_cb_size +
4008 sizeof(struct packet_msg_prot) * 2;
4009
4010 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
4011 parser->patched_cb_size, false, false,
4012 &handle);
4013
4014 if (rc) {
4015 dev_err(hdev->dev,
4016 "Failed to allocate patched CB for DMA CS %d\n",
4017 rc);
4018 return rc;
4019 }
4020
4021 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
4022
4023 if (!parser->patched_cb) {
4024 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
4025 rc = -EFAULT;
4026 goto out;
4027 }
4028
4029
4030
4031
4032
4033 memcpy(parser->patched_cb->kernel_address,
4034 parser->user_cb->kernel_address,
4035 parser->user_cb_size);
4036
4037 patched_cb_size = parser->patched_cb_size;
4038
4039
4040 user_cb = parser->user_cb;
4041 parser->user_cb = parser->patched_cb;
4042 rc = goya_validate_cb(hdev, parser, true);
4043 parser->user_cb = user_cb;
4044
4045 if (rc) {
4046 hl_cb_put(parser->patched_cb);
4047 goto out;
4048 }
4049
4050 if (patched_cb_size != parser->patched_cb_size) {
4051 dev_err(hdev->dev, "user CB size mismatch\n");
4052 hl_cb_put(parser->patched_cb);
4053 rc = -EINVAL;
4054 goto out;
4055 }
4056
4057 out:
4058
4059
4060
4061
4062
4063
4064 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
4065
4066 return rc;
4067 }
4068
4069 static int goya_parse_cb_no_mmu(struct hl_device *hdev,
4070 struct hl_cs_parser *parser)
4071 {
4072 u64 handle;
4073 int rc;
4074
4075 rc = goya_validate_cb(hdev, parser, false);
4076
4077 if (rc)
4078 goto free_userptr;
4079
4080 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
4081 parser->patched_cb_size, false, false,
4082 &handle);
4083 if (rc) {
4084 dev_err(hdev->dev,
4085 "Failed to allocate patched CB for DMA CS %d\n", rc);
4086 goto free_userptr;
4087 }
4088
4089 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
4090
4091 if (!parser->patched_cb) {
4092 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
4093 rc = -EFAULT;
4094 goto out;
4095 }
4096
4097 rc = goya_patch_cb(hdev, parser);
4098
4099 if (rc)
4100 hl_cb_put(parser->patched_cb);
4101
4102 out:
4103
4104
4105
4106
4107
4108
4109 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
4110
4111 free_userptr:
4112 if (rc)
4113 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4114 return rc;
4115 }
4116
4117 static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
4118 struct hl_cs_parser *parser)
4119 {
4120 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4121 struct goya_device *goya = hdev->asic_specific;
4122
4123 if (goya->hw_cap_initialized & HW_CAP_MMU)
4124 return 0;
4125
4126
4127 if (hl_mem_area_inside_range(
4128 (u64) (uintptr_t) parser->user_cb,
4129 parser->user_cb_size,
4130 asic_prop->sram_user_base_address,
4131 asic_prop->sram_end_address))
4132 return 0;
4133
4134 if (hl_mem_area_inside_range(
4135 (u64) (uintptr_t) parser->user_cb,
4136 parser->user_cb_size,
4137 asic_prop->dram_user_base_address,
4138 asic_prop->dram_end_address))
4139 return 0;
4140
4141 dev_err(hdev->dev,
4142 "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
4143 parser->user_cb, parser->user_cb_size);
4144
4145 return -EFAULT;
4146 }
4147
4148 int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4149 {
4150 struct goya_device *goya = hdev->asic_specific;
4151
4152 if (parser->queue_type == QUEUE_TYPE_INT)
4153 return goya_parse_cb_no_ext_queue(hdev, parser);
4154
4155 if (goya->hw_cap_initialized & HW_CAP_MMU)
4156 return goya_parse_cb_mmu(hdev, parser);
4157 else
4158 return goya_parse_cb_no_mmu(hdev, parser);
4159 }
4160
4161 void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
4162 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
4163 u32 msix_vec, bool eb)
4164 {
4165 struct packet_msg_prot *cq_pkt;
4166 u32 tmp;
4167
4168 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4169
4170 tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4171 (1 << GOYA_PKT_CTL_EB_SHIFT) |
4172 (1 << GOYA_PKT_CTL_MB_SHIFT);
4173 cq_pkt->ctl = cpu_to_le32(tmp);
4174 cq_pkt->value = cpu_to_le32(cq_val);
4175 cq_pkt->addr = cpu_to_le64(cq_addr);
4176
4177 cq_pkt++;
4178
4179 tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4180 (1 << GOYA_PKT_CTL_MB_SHIFT);
4181 cq_pkt->ctl = cpu_to_le32(tmp);
4182 cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
4183 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
4184 }
4185
4186 void goya_update_eq_ci(struct hl_device *hdev, u32 val)
4187 {
4188 WREG32(mmCPU_EQ_CI, val);
4189 }
4190
4191 void goya_restore_phase_topology(struct hl_device *hdev)
4192 {
4193
4194 }
4195
4196 static void goya_clear_sm_regs(struct hl_device *hdev)
4197 {
4198 int i, num_of_sob_in_longs, num_of_mon_in_longs;
4199
4200 num_of_sob_in_longs =
4201 ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4202
4203 num_of_mon_in_longs =
4204 ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4205
4206 for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4207 WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4208
4209 for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4210 WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4211
4212
4213 i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4214 }
4215
4216 static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
4217 {
4218 dev_err(hdev->dev, "Reading via DMA is unimplemented yet\n");
4219 return -EPERM;
4220 }
4221
4222 static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4223 {
4224 struct goya_device *goya = hdev->asic_specific;
4225
4226 if (hdev->reset_info.hard_reset_pending)
4227 return U64_MAX;
4228
4229 return readq(hdev->pcie_bar[DDR_BAR_ID] +
4230 (addr - goya->ddr_bar_cur_addr));
4231 }
4232
4233 static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4234 {
4235 struct goya_device *goya = hdev->asic_specific;
4236
4237 if (hdev->reset_info.hard_reset_pending)
4238 return;
4239
4240 writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4241 (addr - goya->ddr_bar_cur_addr));
4242 }
4243
4244 static const char *_goya_get_event_desc(u16 event_type)
4245 {
4246 switch (event_type) {
4247 case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4248 return "PCIe_if";
4249 case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4250 case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4251 case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4252 case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4253 case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4254 case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4255 case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4256 case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4257 return "TPC%d_ecc";
4258 case GOYA_ASYNC_EVENT_ID_MME_ECC:
4259 return "MME_ecc";
4260 case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4261 return "MME_ecc_ext";
4262 case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4263 return "MMU_ecc";
4264 case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4265 return "DMA_macro";
4266 case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4267 return "DMA_ecc";
4268 case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4269 return "CPU_if_ecc";
4270 case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4271 return "PSOC_mem";
4272 case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4273 return "PSOC_coresight";
4274 case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4275 return "SRAM%d";
4276 case GOYA_ASYNC_EVENT_ID_GIC500:
4277 return "GIC500";
4278 case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4279 return "PLL%d";
4280 case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4281 return "AXI_ecc";
4282 case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4283 return "L2_ram_ecc";
4284 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4285 return "PSOC_gpio_05_sw_reset";
4286 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4287 return "PSOC_gpio_10_vrhot_icrit";
4288 case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4289 return "PCIe_dec";
4290 case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4291 case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4292 case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4293 case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4294 case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4295 case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4296 case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4297 case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4298 return "TPC%d_dec";
4299 case GOYA_ASYNC_EVENT_ID_MME_WACS:
4300 return "MME_wacs";
4301 case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4302 return "MME_wacsd";
4303 case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4304 return "CPU_axi_splitter";
4305 case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4306 return "PSOC_axi_dec";
4307 case GOYA_ASYNC_EVENT_ID_PSOC:
4308 return "PSOC";
4309 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4310 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4311 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4312 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4313 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4314 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4315 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4316 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4317 return "TPC%d_krn_err";
4318 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4319 return "TPC%d_cq";
4320 case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4321 return "TPC%d_qm";
4322 case GOYA_ASYNC_EVENT_ID_MME_QM:
4323 return "MME_qm";
4324 case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4325 return "MME_cq";
4326 case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4327 return "DMA%d_qm";
4328 case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4329 return "DMA%d_ch";
4330 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4331 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4332 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4333 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4334 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4335 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4336 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4337 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4338 return "TPC%d_bmon_spmu";
4339 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4340 return "DMA_bm_ch%d";
4341 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4342 return "POWER_ENV_S";
4343 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4344 return "POWER_ENV_E";
4345 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4346 return "THERMAL_ENV_S";
4347 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4348 return "THERMAL_ENV_E";
4349 case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4350 return "QUEUE_OUT_OF_SYNC";
4351 default:
4352 return "N/A";
4353 }
4354 }
4355
4356 static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4357 {
4358 u8 index;
4359
4360 switch (event_type) {
4361 case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4362 case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4363 case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4364 case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4365 case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4366 case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4367 case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4368 case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4369 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4370 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4371 break;
4372 case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4373 index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4374 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4375 break;
4376 case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4377 index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4378 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4379 break;
4380 case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4381 case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4382 case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4383 case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4384 case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4385 case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4386 case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4387 case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4388 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4389 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4390 break;
4391 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4392 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4393 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4394 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4395 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4396 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4397 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4398 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4399 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4400 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4401 break;
4402 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4403 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4404 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4405 break;
4406 case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4407 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4408 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4409 break;
4410 case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4411 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4412 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4413 break;
4414 case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4415 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4416 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4417 break;
4418 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4419 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4420 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4421 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4422 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4423 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4424 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4425 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4426 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4427 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4428 break;
4429 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4430 index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4431 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4432 break;
4433 case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4434 snprintf(desc, size, _goya_get_event_desc(event_type));
4435 break;
4436 default:
4437 snprintf(desc, size, _goya_get_event_desc(event_type));
4438 break;
4439 }
4440 }
4441
4442 static void goya_print_razwi_info(struct hl_device *hdev)
4443 {
4444 if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4445 dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
4446 WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4447 }
4448
4449 if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4450 dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
4451 WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4452 }
4453
4454 if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4455 dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
4456 WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4457 }
4458
4459 if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4460 dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
4461 WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4462 }
4463 }
4464
4465 static void goya_print_mmu_error_info(struct hl_device *hdev)
4466 {
4467 struct goya_device *goya = hdev->asic_specific;
4468 u64 addr;
4469 u32 val;
4470
4471 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4472 return;
4473
4474 val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4475 if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4476 addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4477 addr <<= 32;
4478 addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4479
4480 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
4481 addr);
4482
4483 WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4484 }
4485 }
4486
4487 static void goya_print_out_of_sync_info(struct hl_device *hdev,
4488 struct cpucp_pkt_sync_err *sync_err)
4489 {
4490 struct hl_hw_queue *q = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
4491
4492 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
4493 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
4494 }
4495
4496 static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4497 bool razwi)
4498 {
4499 char desc[20] = "";
4500
4501 goya_get_event_desc(event_type, desc, sizeof(desc));
4502 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4503 event_type, desc);
4504
4505 if (razwi) {
4506 goya_print_razwi_info(hdev);
4507 goya_print_mmu_error_info(hdev);
4508 }
4509 }
4510
4511 static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4512 size_t irq_arr_size)
4513 {
4514 struct cpucp_unmask_irq_arr_packet *pkt;
4515 size_t total_pkt_size;
4516 u64 result;
4517 int rc;
4518 int irq_num_entries, irq_arr_index;
4519 __le32 *goya_irq_arr;
4520
4521 total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
4522 irq_arr_size;
4523
4524
4525 total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4526
4527
4528 if (total_pkt_size > USHRT_MAX) {
4529 dev_err(hdev->dev, "too many elements in IRQ array\n");
4530 return -EINVAL;
4531 }
4532
4533 pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4534 if (!pkt)
4535 return -ENOMEM;
4536
4537 irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4538 pkt->length = cpu_to_le32(irq_num_entries);
4539
4540
4541
4542
4543 for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4544 irq_arr_index < irq_num_entries ; irq_arr_index++)
4545 goya_irq_arr[irq_arr_index] =
4546 cpu_to_le32(irq_arr[irq_arr_index]);
4547
4548 pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4549 CPUCP_PKT_CTL_OPCODE_SHIFT);
4550
4551 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
4552 total_pkt_size, 0, &result);
4553
4554 if (rc)
4555 dev_err(hdev->dev, "failed to unmask IRQ array\n");
4556
4557 kfree(pkt);
4558
4559 return rc;
4560 }
4561
4562 static int goya_non_hard_reset_late_init(struct hl_device *hdev)
4563 {
4564
4565
4566
4567
4568 return goya_unmask_irq_arr(hdev, goya_all_events,
4569 sizeof(goya_all_events));
4570 }
4571
4572 static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4573 {
4574 struct cpucp_packet pkt;
4575 u64 result;
4576 int rc;
4577
4578 memset(&pkt, 0, sizeof(pkt));
4579
4580 pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
4581 CPUCP_PKT_CTL_OPCODE_SHIFT);
4582 pkt.value = cpu_to_le64(event_type);
4583
4584 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4585 0, &result);
4586
4587 if (rc)
4588 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4589
4590 return rc;
4591 }
4592
4593 static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
4594 {
4595 ktime_t zero_time = ktime_set(0, 0);
4596
4597 mutex_lock(&hdev->clk_throttling.lock);
4598
4599 switch (event_type) {
4600 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4601 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
4602 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
4603 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
4604 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
4605 dev_info_ratelimited(hdev->dev,
4606 "Clock throttling due to power consumption\n");
4607 break;
4608
4609 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4610 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
4611 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
4612 dev_info_ratelimited(hdev->dev,
4613 "Power envelop is safe, back to optimal clock\n");
4614 break;
4615
4616 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4617 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
4618 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
4619 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
4620 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
4621 dev_info_ratelimited(hdev->dev,
4622 "Clock throttling due to overheating\n");
4623 break;
4624
4625 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4626 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
4627 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
4628 dev_info_ratelimited(hdev->dev,
4629 "Thermal envelop is safe, back to optimal clock\n");
4630 break;
4631
4632 default:
4633 dev_err(hdev->dev, "Received invalid clock change event %d\n",
4634 event_type);
4635 break;
4636 }
4637
4638 mutex_unlock(&hdev->clk_throttling.lock);
4639 }
4640
4641 void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4642 {
4643 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4644 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4645 >> EQ_CTL_EVENT_TYPE_SHIFT);
4646 struct goya_device *goya = hdev->asic_specific;
4647
4648 if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
4649 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
4650 event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
4651 return;
4652 }
4653
4654 goya->events_stat[event_type]++;
4655 goya->events_stat_aggregate[event_type]++;
4656
4657 switch (event_type) {
4658 case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4659 case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4660 case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4661 case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4662 case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4663 case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4664 case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4665 case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4666 case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4667 case GOYA_ASYNC_EVENT_ID_MME_ECC:
4668 case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4669 case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4670 case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4671 case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4672 case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4673 case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4674 case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4675 case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4676 case GOYA_ASYNC_EVENT_ID_GIC500:
4677 case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4678 case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4679 case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4680 goya_print_irq_info(hdev, event_type, false);
4681 if (hdev->hard_reset_on_fw_events)
4682 hl_device_reset(hdev, (HL_DRV_RESET_HARD |
4683 HL_DRV_RESET_FW_FATAL_ERR));
4684 break;
4685
4686 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4687 goya_print_irq_info(hdev, event_type, false);
4688 if (hdev->hard_reset_on_fw_events)
4689 hl_device_reset(hdev, HL_DRV_RESET_HARD);
4690 break;
4691
4692 case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4693 case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4694 case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4695 case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4696 case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4697 case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4698 case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4699 case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4700 case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4701 case GOYA_ASYNC_EVENT_ID_MME_WACS:
4702 case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4703 case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4704 case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4705 case GOYA_ASYNC_EVENT_ID_PSOC:
4706 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4707 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4708 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4709 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4710 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4711 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4712 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4713 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4714 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4715 case GOYA_ASYNC_EVENT_ID_MME_QM:
4716 case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4717 case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4718 case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4719 goya_print_irq_info(hdev, event_type, true);
4720 goya_unmask_irq(hdev, event_type);
4721 break;
4722
4723 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4724 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4725 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4726 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4727 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4728 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4729 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4730 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4731 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4732 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4733 goya_print_irq_info(hdev, event_type, false);
4734 goya_unmask_irq(hdev, event_type);
4735 break;
4736
4737 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4738 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4739 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4740 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4741 goya_print_clk_change_info(hdev, event_type);
4742 goya_unmask_irq(hdev, event_type);
4743 break;
4744
4745 case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4746 goya_print_irq_info(hdev, event_type, false);
4747 goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
4748 if (hdev->hard_reset_on_fw_events)
4749 hl_device_reset(hdev, HL_DRV_RESET_HARD);
4750 else
4751 hl_fw_unmask_irq(hdev, event_type);
4752 break;
4753
4754 default:
4755 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4756 event_type);
4757 break;
4758 }
4759 }
4760
4761 void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
4762 {
4763 struct goya_device *goya = hdev->asic_specific;
4764
4765 if (aggregate) {
4766 *size = (u32) sizeof(goya->events_stat_aggregate);
4767 return goya->events_stat_aggregate;
4768 }
4769
4770 *size = (u32) sizeof(goya->events_stat);
4771 return goya->events_stat;
4772 }
4773
4774 static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
4775 u64 val, bool is_dram)
4776 {
4777 struct packet_lin_dma *lin_dma_pkt;
4778 struct hl_cs_job *job;
4779 u32 cb_size, ctl;
4780 struct hl_cb *cb;
4781 int rc, lin_dma_pkts_cnt;
4782
4783 lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4784 cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4785 sizeof(struct packet_msg_prot);
4786 cb = hl_cb_kernel_create(hdev, cb_size, false);
4787 if (!cb)
4788 return -ENOMEM;
4789
4790 lin_dma_pkt = cb->kernel_address;
4791
4792 do {
4793 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4794
4795 ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4796 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4797 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4798 (1 << GOYA_PKT_CTL_RB_SHIFT) |
4799 (1 << GOYA_PKT_CTL_MB_SHIFT));
4800 ctl |= (is_dram ? HL_DMA_HOST_TO_DRAM : HL_DMA_HOST_TO_SRAM) <<
4801 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4802 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4803
4804 lin_dma_pkt->src_addr = cpu_to_le64(val);
4805 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4806 if (lin_dma_pkts_cnt > 1)
4807 lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4808 else
4809 lin_dma_pkt->tsize = cpu_to_le32(size);
4810
4811 size -= SZ_2G;
4812 addr += SZ_2G;
4813 lin_dma_pkt++;
4814 } while (--lin_dma_pkts_cnt);
4815
4816 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4817 if (!job) {
4818 dev_err(hdev->dev, "Failed to allocate a new job\n");
4819 rc = -ENOMEM;
4820 goto release_cb;
4821 }
4822
4823 job->id = 0;
4824 job->user_cb = cb;
4825 atomic_inc(&job->user_cb->cs_cnt);
4826 job->user_cb_size = cb_size;
4827 job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4828 job->patched_cb = job->user_cb;
4829 job->job_cb_size = job->user_cb_size;
4830
4831 hl_debugfs_add_job(hdev, job);
4832
4833 rc = goya_send_job_on_qman0(hdev, job);
4834
4835 hl_debugfs_remove_job(hdev, job);
4836 kfree(job);
4837 atomic_dec(&cb->cs_cnt);
4838
4839 release_cb:
4840 hl_cb_put(cb);
4841 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
4842
4843 return rc;
4844 }
4845
4846 int goya_context_switch(struct hl_device *hdev, u32 asid)
4847 {
4848 struct asic_fixed_properties *prop = &hdev->asic_prop;
4849 u64 addr = prop->sram_base_address, sob_addr;
4850 u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4851 u64 val = 0x7777777777777777ull;
4852 int rc, dma_id;
4853 u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4854 mmDMA_CH_0_WR_COMP_ADDR_LO;
4855
4856 rc = goya_memset_device_memory(hdev, addr, size, val, false);
4857 if (rc) {
4858 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4859 return rc;
4860 }
4861
4862
4863 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4864 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4865
4866 for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4867 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4868 (dma_id - 1) * 4;
4869 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4870 lower_32_bits(sob_addr));
4871 }
4872
4873 WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
4874
4875 goya_clear_sm_regs(hdev);
4876
4877 return 0;
4878 }
4879
4880 static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4881 {
4882 struct asic_fixed_properties *prop = &hdev->asic_prop;
4883 struct goya_device *goya = hdev->asic_specific;
4884 u64 addr = prop->mmu_pgt_addr;
4885 u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4886 MMU_CACHE_MNG_SIZE;
4887
4888 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4889 return 0;
4890
4891 return goya_memset_device_memory(hdev, addr, size, 0, true);
4892 }
4893
4894 static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4895 {
4896 struct goya_device *goya = hdev->asic_specific;
4897 u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4898 u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4899 u64 val = 0x9999999999999999ull;
4900
4901 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4902 return 0;
4903
4904 return goya_memset_device_memory(hdev, addr, size, val, true);
4905 }
4906
4907 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4908 {
4909 struct asic_fixed_properties *prop = &hdev->asic_prop;
4910 struct goya_device *goya = hdev->asic_specific;
4911 s64 off, cpu_off;
4912 int rc;
4913
4914 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4915 return 0;
4916
4917 for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
4918 rc = hl_mmu_map_page(hdev->kernel_ctx,
4919 prop->dram_base_address + off,
4920 prop->dram_base_address + off, PAGE_SIZE_2MB,
4921 (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
4922 if (rc) {
4923 dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4924 prop->dram_base_address + off);
4925 goto unmap;
4926 }
4927 }
4928
4929 if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4930 rc = hl_mmu_map_page(hdev->kernel_ctx,
4931 VA_CPU_ACCESSIBLE_MEM_ADDR,
4932 hdev->cpu_accessible_dma_address,
4933 PAGE_SIZE_2MB, true);
4934
4935 if (rc) {
4936 dev_err(hdev->dev,
4937 "Map failed for CPU accessible memory\n");
4938 off -= PAGE_SIZE_2MB;
4939 goto unmap;
4940 }
4941 } else {
4942 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
4943 rc = hl_mmu_map_page(hdev->kernel_ctx,
4944 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4945 hdev->cpu_accessible_dma_address + cpu_off,
4946 PAGE_SIZE_4KB, true);
4947 if (rc) {
4948 dev_err(hdev->dev,
4949 "Map failed for CPU accessible memory\n");
4950 cpu_off -= PAGE_SIZE_4KB;
4951 goto unmap_cpu;
4952 }
4953 }
4954 }
4955
4956 goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4957 goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4958 WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4959 WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4960
4961
4962 RREG32(mmCPU_IF_AWUSER_OVR_EN);
4963
4964 goya->device_cpu_mmu_mappings_done = true;
4965
4966 return 0;
4967
4968 unmap_cpu:
4969 for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
4970 if (hl_mmu_unmap_page(hdev->kernel_ctx,
4971 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4972 PAGE_SIZE_4KB, true))
4973 dev_warn_ratelimited(hdev->dev,
4974 "failed to unmap address 0x%llx\n",
4975 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4976 unmap:
4977 for (; off >= 0 ; off -= PAGE_SIZE_2MB)
4978 if (hl_mmu_unmap_page(hdev->kernel_ctx,
4979 prop->dram_base_address + off, PAGE_SIZE_2MB,
4980 true))
4981 dev_warn_ratelimited(hdev->dev,
4982 "failed to unmap address 0x%llx\n",
4983 prop->dram_base_address + off);
4984
4985 return rc;
4986 }
4987
4988 void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4989 {
4990 struct asic_fixed_properties *prop = &hdev->asic_prop;
4991 struct goya_device *goya = hdev->asic_specific;
4992 u32 off, cpu_off;
4993
4994 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4995 return;
4996
4997 if (!goya->device_cpu_mmu_mappings_done)
4998 return;
4999
5000 WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
5001 WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
5002
5003 if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
5004 if (hl_mmu_unmap_page(hdev->kernel_ctx,
5005 VA_CPU_ACCESSIBLE_MEM_ADDR,
5006 PAGE_SIZE_2MB, true))
5007 dev_warn(hdev->dev,
5008 "Failed to unmap CPU accessible memory\n");
5009 } else {
5010 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
5011 if (hl_mmu_unmap_page(hdev->kernel_ctx,
5012 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
5013 PAGE_SIZE_4KB,
5014 (cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
5015 dev_warn_ratelimited(hdev->dev,
5016 "failed to unmap address 0x%llx\n",
5017 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
5018 }
5019
5020 for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
5021 if (hl_mmu_unmap_page(hdev->kernel_ctx,
5022 prop->dram_base_address + off, PAGE_SIZE_2MB,
5023 (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
5024 dev_warn_ratelimited(hdev->dev,
5025 "Failed to unmap address 0x%llx\n",
5026 prop->dram_base_address + off);
5027
5028 goya->device_cpu_mmu_mappings_done = false;
5029 }
5030
5031 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
5032 {
5033 struct goya_device *goya = hdev->asic_specific;
5034 int i;
5035
5036 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
5037 return;
5038
5039 if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
5040 dev_crit(hdev->dev, "asid %u is too big\n", asid);
5041 return;
5042 }
5043
5044
5045 for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
5046 goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
5047 }
5048
5049 static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5050 u32 flags)
5051 {
5052 struct goya_device *goya = hdev->asic_specific;
5053 u32 status, timeout_usec;
5054 int rc;
5055
5056 if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5057 hdev->reset_info.hard_reset_pending)
5058 return 0;
5059
5060
5061 if (!is_hard)
5062 return 0;
5063
5064 if (hdev->pldm)
5065 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5066 else
5067 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5068
5069
5070 WREG32(mmSTLB_INV_ALL_START, 1);
5071
5072 rc = hl_poll_timeout(
5073 hdev,
5074 mmSTLB_INV_ALL_START,
5075 status,
5076 !status,
5077 1000,
5078 timeout_usec);
5079
5080 return rc;
5081 }
5082
5083 static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
5084 bool is_hard, u32 flags,
5085 u32 asid, u64 va, u64 size)
5086 {
5087
5088
5089
5090 return hl_mmu_invalidate_cache(hdev, is_hard, flags);
5091 }
5092
5093 int goya_send_heartbeat(struct hl_device *hdev)
5094 {
5095 struct goya_device *goya = hdev->asic_specific;
5096
5097 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5098 return 0;
5099
5100 return hl_fw_send_heartbeat(hdev);
5101 }
5102
5103 int goya_cpucp_info_get(struct hl_device *hdev)
5104 {
5105 struct goya_device *goya = hdev->asic_specific;
5106 struct asic_fixed_properties *prop = &hdev->asic_prop;
5107 u64 dram_size;
5108 int rc;
5109
5110 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5111 return 0;
5112
5113 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
5114 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
5115 mmCPU_BOOT_ERR1);
5116 if (rc)
5117 return rc;
5118
5119 dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
5120 if (dram_size) {
5121 if ((!is_power_of_2(dram_size)) ||
5122 (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5123 dev_err(hdev->dev,
5124 "F/W reported invalid DRAM size %llu. Trying to use default size\n",
5125 dram_size);
5126 dram_size = DRAM_PHYS_DEFAULT_SIZE;
5127 }
5128
5129 prop->dram_size = dram_size;
5130 prop->dram_end_address = prop->dram_base_address + dram_size;
5131 }
5132
5133 if (!strlen(prop->cpucp_info.card_name))
5134 strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
5135 CARD_NAME_MAX_LEN);
5136
5137 return 0;
5138 }
5139
5140 static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
5141 u8 mask_len, struct seq_file *s)
5142 {
5143 const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5144 const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5145 unsigned long *mask = (unsigned long *)mask_arr;
5146 u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5147 mme_arch_sts;
5148 bool is_idle = true, is_eng_idle;
5149 u64 offset;
5150 int i;
5151
5152 if (s)
5153 seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
5154 "--- ------- ------------ -------------\n");
5155
5156 offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5157
5158 for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5159 qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5160 dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5161 is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5162 IS_DMA_IDLE(dma_core_sts0);
5163 is_idle &= is_eng_idle;
5164
5165 if (mask && !is_eng_idle)
5166 set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
5167 if (s)
5168 seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
5169 qm_glbl_sts0, dma_core_sts0);
5170 }
5171
5172 if (s)
5173 seq_puts(s,
5174 "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
5175 "--- ------- ------------ -------------- ----------\n");
5176
5177 offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5178
5179 for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5180 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5181 cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5182 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5183 is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5184 IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5185 IS_TPC_IDLE(tpc_cfg_sts);
5186 is_idle &= is_eng_idle;
5187
5188 if (mask && !is_eng_idle)
5189 set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
5190 if (s)
5191 seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
5192 qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5193 }
5194
5195 if (s)
5196 seq_puts(s,
5197 "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
5198 "--- ------- ------------ -------------- -----------\n");
5199
5200 qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5201 cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5202 mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5203 is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5204 IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5205 IS_MME_IDLE(mme_arch_sts);
5206 is_idle &= is_eng_idle;
5207
5208 if (mask && !is_eng_idle)
5209 set_bit(GOYA_ENGINE_ID_MME_0, mask);
5210 if (s) {
5211 seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5212 cmdq_glbl_sts0, mme_arch_sts);
5213 seq_puts(s, "\n");
5214 }
5215
5216 return is_idle;
5217 }
5218
5219 static void goya_hw_queues_lock(struct hl_device *hdev)
5220 __acquires(&goya->hw_queues_lock)
5221 {
5222 struct goya_device *goya = hdev->asic_specific;
5223
5224 spin_lock(&goya->hw_queues_lock);
5225 }
5226
5227 static void goya_hw_queues_unlock(struct hl_device *hdev)
5228 __releases(&goya->hw_queues_lock)
5229 {
5230 struct goya_device *goya = hdev->asic_specific;
5231
5232 spin_unlock(&goya->hw_queues_lock);
5233 }
5234
5235 static u32 goya_get_pci_id(struct hl_device *hdev)
5236 {
5237 return hdev->pdev->device;
5238 }
5239
5240 static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5241 size_t max_size)
5242 {
5243 struct goya_device *goya = hdev->asic_specific;
5244
5245 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5246 return 0;
5247
5248 return hl_fw_get_eeprom_data(hdev, data, max_size);
5249 }
5250
5251 static void goya_cpu_init_scrambler_dram(struct hl_device *hdev)
5252 {
5253
5254 }
5255
5256 static int goya_ctx_init(struct hl_ctx *ctx)
5257 {
5258 if (ctx->asid != HL_KERNEL_ASID_ID)
5259 goya_mmu_prepare(ctx->hdev, ctx->asid);
5260
5261 return 0;
5262 }
5263
5264 static int goya_pre_schedule_cs(struct hl_cs *cs)
5265 {
5266 return 0;
5267 }
5268
5269 u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
5270 {
5271 return cq_idx;
5272 }
5273
5274 static u32 goya_get_signal_cb_size(struct hl_device *hdev)
5275 {
5276 return 0;
5277 }
5278
5279 static u32 goya_get_wait_cb_size(struct hl_device *hdev)
5280 {
5281 return 0;
5282 }
5283
5284 static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
5285 u32 size, bool eb)
5286 {
5287 return 0;
5288 }
5289
5290 static u32 goya_gen_wait_cb(struct hl_device *hdev,
5291 struct hl_gen_wait_properties *prop)
5292 {
5293 return 0;
5294 }
5295
5296 static void goya_reset_sob(struct hl_device *hdev, void *data)
5297 {
5298
5299 }
5300
5301 static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
5302 {
5303
5304 }
5305
5306 u64 goya_get_device_time(struct hl_device *hdev)
5307 {
5308 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
5309
5310 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
5311 }
5312
5313 static int goya_collective_wait_init_cs(struct hl_cs *cs)
5314 {
5315 return 0;
5316 }
5317
5318 static int goya_collective_wait_create_jobs(struct hl_device *hdev,
5319 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
5320 u32 collective_engine_id, u32 encaps_signal_offset)
5321 {
5322 return -EINVAL;
5323 }
5324
5325 static void goya_ctx_fini(struct hl_ctx *ctx)
5326 {
5327
5328 }
5329
5330 static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
5331 u32 *block_size, u32 *block_id)
5332 {
5333 return -EPERM;
5334 }
5335
5336 static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5337 u32 block_id, u32 block_size)
5338 {
5339 return -EPERM;
5340 }
5341
5342 static void goya_enable_events_from_fw(struct hl_device *hdev)
5343 {
5344 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
5345 GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
5346 }
5347
5348 static int goya_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
5349 {
5350 return -EINVAL;
5351 }
5352
5353 static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
5354 {
5355 switch (pll_idx) {
5356 case HL_GOYA_CPU_PLL: return CPU_PLL;
5357 case HL_GOYA_PCI_PLL: return PCI_PLL;
5358 case HL_GOYA_MME_PLL: return MME_PLL;
5359 case HL_GOYA_TPC_PLL: return TPC_PLL;
5360 case HL_GOYA_IC_PLL: return IC_PLL;
5361 case HL_GOYA_MC_PLL: return MC_PLL;
5362 case HL_GOYA_EMMC_PLL: return EMMC_PLL;
5363 default: return -EINVAL;
5364 }
5365 }
5366
5367 static int goya_gen_sync_to_engine_map(struct hl_device *hdev,
5368 struct hl_sync_to_engine_map *map)
5369 {
5370
5371 return 0;
5372 }
5373
5374 static int goya_monitor_valid(struct hl_mon_state_dump *mon)
5375 {
5376
5377 return 0;
5378 }
5379
5380 static int goya_print_single_monitor(char **buf, size_t *size, size_t *offset,
5381 struct hl_device *hdev,
5382 struct hl_mon_state_dump *mon)
5383 {
5384
5385 return 0;
5386 }
5387
5388
5389 static int goya_print_fences_single_engine(
5390 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
5391 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
5392 size_t *size, size_t *offset)
5393 {
5394
5395 return 0;
5396 }
5397
5398
5399 static struct hl_state_dump_specs_funcs goya_state_dump_funcs = {
5400 .monitor_valid = goya_monitor_valid,
5401 .print_single_monitor = goya_print_single_monitor,
5402 .gen_sync_to_engine_map = goya_gen_sync_to_engine_map,
5403 .print_fences_single_engine = goya_print_fences_single_engine,
5404 };
5405
5406 static void goya_state_dump_init(struct hl_device *hdev)
5407 {
5408
5409 hdev->state_dump_specs.props = goya_state_dump_specs_props;
5410 hdev->state_dump_specs.funcs = goya_state_dump_funcs;
5411 }
5412
5413 static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id)
5414 {
5415 return 0;
5416 }
5417
5418 static u32 *goya_get_stream_master_qid_arr(void)
5419 {
5420 return NULL;
5421 }
5422
5423 static int goya_get_monitor_dump(struct hl_device *hdev, void *data)
5424 {
5425 return -EOPNOTSUPP;
5426 }
5427
5428 static void goya_check_if_razwi_happened(struct hl_device *hdev)
5429 {
5430 }
5431
5432 static int goya_scrub_device_dram(struct hl_device *hdev, u64 val)
5433 {
5434 return -EOPNOTSUPP;
5435 }
5436
5437 static const struct hl_asic_funcs goya_funcs = {
5438 .early_init = goya_early_init,
5439 .early_fini = goya_early_fini,
5440 .late_init = goya_late_init,
5441 .late_fini = goya_late_fini,
5442 .sw_init = goya_sw_init,
5443 .sw_fini = goya_sw_fini,
5444 .hw_init = goya_hw_init,
5445 .hw_fini = goya_hw_fini,
5446 .halt_engines = goya_halt_engines,
5447 .suspend = goya_suspend,
5448 .resume = goya_resume,
5449 .mmap = goya_mmap,
5450 .ring_doorbell = goya_ring_doorbell,
5451 .pqe_write = goya_pqe_write,
5452 .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5453 .asic_dma_free_coherent = goya_dma_free_coherent,
5454 .scrub_device_mem = goya_scrub_device_mem,
5455 .scrub_device_dram = goya_scrub_device_dram,
5456 .get_int_queue_base = goya_get_int_queue_base,
5457 .test_queues = goya_test_queues,
5458 .asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5459 .asic_dma_pool_free = goya_dma_pool_free,
5460 .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5461 .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5462 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
5463 .cs_parser = goya_cs_parser,
5464 .asic_dma_map_sgtable = hl_dma_map_sgtable,
5465 .add_end_of_cb_packets = goya_add_end_of_cb_packets,
5466 .update_eq_ci = goya_update_eq_ci,
5467 .context_switch = goya_context_switch,
5468 .restore_phase_topology = goya_restore_phase_topology,
5469 .debugfs_read_dma = goya_debugfs_read_dma,
5470 .add_device_attr = goya_add_device_attr,
5471 .handle_eqe = goya_handle_eqe,
5472 .get_events_stat = goya_get_events_stat,
5473 .read_pte = goya_read_pte,
5474 .write_pte = goya_write_pte,
5475 .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5476 .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5477 .mmu_prefetch_cache_range = NULL,
5478 .send_heartbeat = goya_send_heartbeat,
5479 .debug_coresight = goya_debug_coresight,
5480 .is_device_idle = goya_is_device_idle,
5481 .non_hard_reset_late_init = goya_non_hard_reset_late_init,
5482 .hw_queues_lock = goya_hw_queues_lock,
5483 .hw_queues_unlock = goya_hw_queues_unlock,
5484 .kdma_lock = NULL,
5485 .kdma_unlock = NULL,
5486 .get_pci_id = goya_get_pci_id,
5487 .get_eeprom_data = goya_get_eeprom_data,
5488 .get_monitor_dump = goya_get_monitor_dump,
5489 .send_cpu_message = goya_send_cpu_message,
5490 .pci_bars_map = goya_pci_bars_map,
5491 .init_iatu = goya_init_iatu,
5492 .rreg = hl_rreg,
5493 .wreg = hl_wreg,
5494 .halt_coresight = goya_halt_coresight,
5495 .ctx_init = goya_ctx_init,
5496 .ctx_fini = goya_ctx_fini,
5497 .pre_schedule_cs = goya_pre_schedule_cs,
5498 .get_queue_id_for_cq = goya_get_queue_id_for_cq,
5499 .load_firmware_to_device = goya_load_firmware_to_device,
5500 .load_boot_fit_to_device = goya_load_boot_fit_to_device,
5501 .get_signal_cb_size = goya_get_signal_cb_size,
5502 .get_wait_cb_size = goya_get_wait_cb_size,
5503 .gen_signal_cb = goya_gen_signal_cb,
5504 .gen_wait_cb = goya_gen_wait_cb,
5505 .reset_sob = goya_reset_sob,
5506 .reset_sob_group = goya_reset_sob_group,
5507 .get_device_time = goya_get_device_time,
5508 .pb_print_security_errors = NULL,
5509 .collective_wait_init_cs = goya_collective_wait_init_cs,
5510 .collective_wait_create_jobs = goya_collective_wait_create_jobs,
5511 .get_dec_base_addr = NULL,
5512 .scramble_addr = hl_mmu_scramble_addr,
5513 .descramble_addr = hl_mmu_descramble_addr,
5514 .ack_protection_bits_errors = goya_ack_protection_bits_errors,
5515 .get_hw_block_id = goya_get_hw_block_id,
5516 .hw_block_mmap = goya_block_mmap,
5517 .enable_events_from_fw = goya_enable_events_from_fw,
5518 .ack_mmu_errors = goya_ack_mmu_page_fault_or_access_error,
5519 .map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
5520 .init_firmware_preload_params = goya_init_firmware_preload_params,
5521 .init_firmware_loader = goya_init_firmware_loader,
5522 .init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
5523 .state_dump_init = goya_state_dump_init,
5524 .get_sob_addr = &goya_get_sob_addr,
5525 .set_pci_memory_regions = goya_set_pci_memory_regions,
5526 .get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
5527 .check_if_razwi_happened = goya_check_if_razwi_happened,
5528 .mmu_get_real_page_size = hl_mmu_get_real_page_size,
5529 .access_dev_mem = hl_access_dev_mem,
5530 .set_dram_bar_base = goya_set_ddr_bar_base,
5531 };
5532
5533
5534
5535
5536
5537
5538
5539 void goya_set_asic_funcs(struct hl_device *hdev)
5540 {
5541 hdev->asic_funcs = &goya_funcs;
5542 }