Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2014 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  */
0023 
0024 #include "amdgpu.h"
0025 #include "nbio/nbio_6_1_offset.h"
0026 #include "nbio/nbio_6_1_sh_mask.h"
0027 #include "gc/gc_9_0_offset.h"
0028 #include "gc/gc_9_0_sh_mask.h"
0029 #include "mp/mp_9_0_offset.h"
0030 #include "soc15.h"
0031 #include "vega10_ih.h"
0032 #include "soc15_common.h"
0033 #include "mxgpu_ai.h"
0034 
0035 #include "amdgpu_reset.h"
0036 
0037 static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
0038 {
0039     WREG8(AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
0040 }
0041 
0042 static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)
0043 {
0044     WREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0);
0045 }
0046 
0047 /*
0048  * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
0049  * RCV_MSG_VALID filed of BIF_BX_PF0_MAILBOX_CONTROL must already be set to 1
0050  * by host.
0051  *
0052  * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
0053  * correct value since it doesn't return the RCV_DW0 under the case that
0054  * RCV_MSG_VALID is set by host.
0055  */
0056 static enum idh_event xgpu_ai_mailbox_peek_msg(struct amdgpu_device *adev)
0057 {
0058     return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
0059                 mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
0060 }
0061 
0062 
0063 static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,
0064                    enum idh_event event)
0065 {
0066     u32 reg;
0067 
0068     reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
0069                          mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
0070     if (reg != event)
0071         return -ENOENT;
0072 
0073     xgpu_ai_mailbox_send_ack(adev);
0074 
0075     return 0;
0076 }
0077 
0078 static uint8_t xgpu_ai_peek_ack(struct amdgpu_device *adev) {
0079     return RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2;
0080 }
0081 
0082 static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
0083 {
0084     int timeout  = AI_MAILBOX_POLL_ACK_TIMEDOUT;
0085     u8 reg;
0086 
0087     do {
0088         reg = RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE);
0089         if (reg & 2)
0090             return 0;
0091 
0092         mdelay(5);
0093         timeout -= 5;
0094     } while (timeout > 1);
0095 
0096     pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT);
0097 
0098     return -ETIME;
0099 }
0100 
0101 static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
0102 {
0103     int r, timeout = AI_MAILBOX_POLL_MSG_TIMEDOUT;
0104 
0105     do {
0106         r = xgpu_ai_mailbox_rcv_msg(adev, event);
0107         if (!r)
0108             return 0;
0109 
0110         msleep(10);
0111         timeout -= 10;
0112     } while (timeout > 1);
0113 
0114     pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
0115 
0116     return -ETIME;
0117 }
0118 
0119 static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
0120           enum idh_request req, u32 data1, u32 data2, u32 data3) {
0121     u32 reg;
0122     int r;
0123     uint8_t trn;
0124 
0125     /* IMPORTANT:
0126      * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK
0127      * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK
0128      * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_ai_poll_ack()
0129      * will return immediatly
0130      */
0131     do {
0132         xgpu_ai_mailbox_set_valid(adev, false);
0133         trn = xgpu_ai_peek_ack(adev);
0134         if (trn) {
0135             pr_err("trn=%x ACK should not assert! wait again !\n", trn);
0136             msleep(1);
0137         }
0138     } while(trn);
0139 
0140     reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
0141                          mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0));
0142     reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0,
0143                 MSGBUF_DATA, req);
0144     WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0),
0145               reg);
0146     WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW1),
0147                 data1);
0148     WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW2),
0149                 data2);
0150     WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW3),
0151                 data3);
0152 
0153     xgpu_ai_mailbox_set_valid(adev, true);
0154 
0155     /* start to poll ack */
0156     r = xgpu_ai_poll_ack(adev);
0157     if (r)
0158         pr_err("Doesn't get ack from pf, continue\n");
0159 
0160     xgpu_ai_mailbox_set_valid(adev, false);
0161 }
0162 
0163 static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
0164                     enum idh_request req)
0165 {
0166     int r;
0167 
0168     xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0);
0169 
0170     /* start to check msg if request is idh_req_gpu_init_access */
0171     if (req == IDH_REQ_GPU_INIT_ACCESS ||
0172         req == IDH_REQ_GPU_FINI_ACCESS ||
0173         req == IDH_REQ_GPU_RESET_ACCESS) {
0174         r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
0175         if (r) {
0176             pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
0177             return r;
0178         }
0179         /* Retrieve checksum from mailbox2 */
0180         if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) {
0181             adev->virt.fw_reserve.checksum_key =
0182                 RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
0183                     mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2));
0184         }
0185     } else if (req == IDH_REQ_GPU_INIT_DATA){
0186         /* Dummy REQ_GPU_INIT_DATA handling */
0187         r = xgpu_ai_poll_msg(adev, IDH_REQ_GPU_INIT_DATA_READY);
0188         /* version set to 0 since dummy */
0189         adev->virt.req_init_data_ver = 0;   
0190     }
0191 
0192     return 0;
0193 }
0194 
0195 static int xgpu_ai_request_reset(struct amdgpu_device *adev)
0196 {
0197     int ret, i = 0;
0198 
0199     while (i < AI_MAILBOX_POLL_MSG_REP_MAX) {
0200         ret = xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
0201         if (!ret)
0202             break;
0203         i++;
0204     }
0205 
0206     return ret;
0207 }
0208 
0209 static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev,
0210                        bool init)
0211 {
0212     enum idh_request req;
0213 
0214     req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS;
0215     return xgpu_ai_send_access_requests(adev, req);
0216 }
0217 
0218 static int xgpu_ai_release_full_gpu_access(struct amdgpu_device *adev,
0219                        bool init)
0220 {
0221     enum idh_request req;
0222     int r = 0;
0223 
0224     req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS;
0225     r = xgpu_ai_send_access_requests(adev, req);
0226 
0227     return r;
0228 }
0229 
0230 static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
0231                     struct amdgpu_irq_src *source,
0232                     struct amdgpu_iv_entry *entry)
0233 {
0234     DRM_DEBUG("get ack intr and do nothing.\n");
0235     return 0;
0236 }
0237 
0238 static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev,
0239                     struct amdgpu_irq_src *source,
0240                     unsigned type,
0241                     enum amdgpu_interrupt_state state)
0242 {
0243     u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL));
0244 
0245     tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_MAILBOX_INT_CNTL, ACK_INT_EN,
0246                 (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
0247     WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL), tmp);
0248 
0249     return 0;
0250 }
0251 
0252 static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
0253 {
0254     struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
0255     struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
0256     int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
0257 
0258     /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
0259      * otherwise the mailbox msg will be ruined/reseted by
0260      * the VF FLR.
0261      */
0262     if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0)
0263         return;
0264 
0265     down_write(&adev->reset_domain->sem);
0266 
0267     amdgpu_virt_fini_data_exchange(adev);
0268 
0269     xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
0270 
0271     do {
0272         if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
0273             goto flr_done;
0274 
0275         msleep(10);
0276         timeout -= 10;
0277     } while (timeout > 1);
0278 
0279 flr_done:
0280     atomic_set(&adev->reset_domain->in_gpu_reset, 0);
0281     up_write(&adev->reset_domain->sem);
0282 
0283     /* Trigger recovery for world switch failure if no TDR */
0284     if (amdgpu_device_should_recover_gpu(adev)
0285         && (!amdgpu_device_has_job_running(adev) ||
0286             adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) {
0287         struct amdgpu_reset_context reset_context;
0288         memset(&reset_context, 0, sizeof(reset_context));
0289 
0290         reset_context.method = AMD_RESET_METHOD_NONE;
0291         reset_context.reset_req_dev = adev;
0292         clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
0293 
0294         amdgpu_device_gpu_recover(adev, NULL, &reset_context);
0295     }
0296 }
0297 
0298 static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
0299                        struct amdgpu_irq_src *src,
0300                        unsigned type,
0301                        enum amdgpu_interrupt_state state)
0302 {
0303     u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL));
0304 
0305     tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_MAILBOX_INT_CNTL, VALID_INT_EN,
0306                 (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
0307     WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL), tmp);
0308 
0309     return 0;
0310 }
0311 
0312 static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
0313                    struct amdgpu_irq_src *source,
0314                    struct amdgpu_iv_entry *entry)
0315 {
0316     enum idh_event event = xgpu_ai_mailbox_peek_msg(adev);
0317 
0318     switch (event) {
0319         case IDH_FLR_NOTIFICATION:
0320         if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev))
0321             WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
0322                                 &adev->virt.flr_work),
0323                   "Failed to queue work! at %s",
0324                   __func__);
0325         break;
0326         case IDH_QUERY_ALIVE:
0327             xgpu_ai_mailbox_send_ack(adev);
0328             break;
0329         /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
0330          * it byfar since that polling thread will handle it,
0331          * other msg like flr complete is not handled here.
0332          */
0333         case IDH_CLR_MSG_BUF:
0334         case IDH_FLR_NOTIFICATION_CMPL:
0335         case IDH_READY_TO_ACCESS_GPU:
0336         default:
0337         break;
0338     }
0339 
0340     return 0;
0341 }
0342 
0343 static const struct amdgpu_irq_src_funcs xgpu_ai_mailbox_ack_irq_funcs = {
0344     .set = xgpu_ai_set_mailbox_ack_irq,
0345     .process = xgpu_ai_mailbox_ack_irq,
0346 };
0347 
0348 static const struct amdgpu_irq_src_funcs xgpu_ai_mailbox_rcv_irq_funcs = {
0349     .set = xgpu_ai_set_mailbox_rcv_irq,
0350     .process = xgpu_ai_mailbox_rcv_irq,
0351 };
0352 
0353 void xgpu_ai_mailbox_set_irq_funcs(struct amdgpu_device *adev)
0354 {
0355     adev->virt.ack_irq.num_types = 1;
0356     adev->virt.ack_irq.funcs = &xgpu_ai_mailbox_ack_irq_funcs;
0357     adev->virt.rcv_irq.num_types = 1;
0358     adev->virt.rcv_irq.funcs = &xgpu_ai_mailbox_rcv_irq_funcs;
0359 }
0360 
0361 int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
0362 {
0363     int r;
0364 
0365     r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
0366     if (r)
0367         return r;
0368 
0369     r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
0370     if (r) {
0371         amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
0372         return r;
0373     }
0374 
0375     return 0;
0376 }
0377 
0378 int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
0379 {
0380     int r;
0381 
0382     r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0);
0383     if (r)
0384         return r;
0385     r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0);
0386     if (r) {
0387         amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
0388         return r;
0389     }
0390 
0391     INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
0392 
0393     return 0;
0394 }
0395 
0396 void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
0397 {
0398     amdgpu_irq_put(adev, &adev->virt.ack_irq, 0);
0399     amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
0400 }
0401 
0402 static int xgpu_ai_request_init_data(struct amdgpu_device *adev)
0403 {
0404     return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
0405 }
0406 
0407 const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
0408     .req_full_gpu   = xgpu_ai_request_full_gpu_access,
0409     .rel_full_gpu   = xgpu_ai_release_full_gpu_access,
0410     .reset_gpu = xgpu_ai_request_reset,
0411     .wait_reset = NULL,
0412     .trans_msg = xgpu_ai_mailbox_trans_msg,
0413     .req_init_data  = xgpu_ai_request_init_data,
0414 };