Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2011 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  * Authors: Alex Deucher
0023  */
0024 
0025 #include <linux/firmware.h>
0026 #include <linux/module.h>
0027 #include <linux/pci.h>
0028 #include <linux/slab.h>
0029 
0030 #include <drm/drm_vblank.h>
0031 #include <drm/radeon_drm.h>
0032 
0033 #include "atom.h"
0034 #include "clearstate_si.h"
0035 #include "evergreen.h"
0036 #include "r600.h"
0037 #include "radeon.h"
0038 #include "radeon_asic.h"
0039 #include "radeon_audio.h"
0040 #include "radeon_ucode.h"
0041 #include "si_blit_shaders.h"
0042 #include "si.h"
0043 #include "sid.h"
0044 
0045 
0046 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
0047 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
0048 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
0049 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
0050 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
0051 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
0052 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
0053 
0054 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
0055 MODULE_FIRMWARE("radeon/tahiti_me.bin");
0056 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
0057 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
0058 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
0059 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
0060 
0061 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
0062 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
0063 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
0064 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
0065 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
0066 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
0067 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
0068 
0069 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
0070 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
0071 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
0072 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
0073 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
0074 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
0075 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
0076 
0077 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
0078 MODULE_FIRMWARE("radeon/VERDE_me.bin");
0079 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
0080 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
0081 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
0082 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
0083 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
0084 
0085 MODULE_FIRMWARE("radeon/verde_pfp.bin");
0086 MODULE_FIRMWARE("radeon/verde_me.bin");
0087 MODULE_FIRMWARE("radeon/verde_ce.bin");
0088 MODULE_FIRMWARE("radeon/verde_mc.bin");
0089 MODULE_FIRMWARE("radeon/verde_rlc.bin");
0090 MODULE_FIRMWARE("radeon/verde_smc.bin");
0091 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
0092 
0093 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
0094 MODULE_FIRMWARE("radeon/OLAND_me.bin");
0095 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
0096 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
0097 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
0098 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
0099 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
0100 
0101 MODULE_FIRMWARE("radeon/oland_pfp.bin");
0102 MODULE_FIRMWARE("radeon/oland_me.bin");
0103 MODULE_FIRMWARE("radeon/oland_ce.bin");
0104 MODULE_FIRMWARE("radeon/oland_mc.bin");
0105 MODULE_FIRMWARE("radeon/oland_rlc.bin");
0106 MODULE_FIRMWARE("radeon/oland_smc.bin");
0107 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
0108 
0109 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
0110 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
0111 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
0112 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
0113 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
0114 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
0115 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
0116 
0117 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
0118 MODULE_FIRMWARE("radeon/hainan_me.bin");
0119 MODULE_FIRMWARE("radeon/hainan_ce.bin");
0120 MODULE_FIRMWARE("radeon/hainan_mc.bin");
0121 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
0122 MODULE_FIRMWARE("radeon/hainan_smc.bin");
0123 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
0124 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
0125 
0126 MODULE_FIRMWARE("radeon/si58_mc.bin");
0127 
0128 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
0129 static void si_pcie_gen3_enable(struct radeon_device *rdev);
0130 static void si_program_aspm(struct radeon_device *rdev);
0131 extern void sumo_rlc_fini(struct radeon_device *rdev);
0132 extern int sumo_rlc_init(struct radeon_device *rdev);
0133 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
0134                      bool enable);
0135 static void si_init_pg(struct radeon_device *rdev);
0136 static void si_init_cg(struct radeon_device *rdev);
0137 static void si_fini_pg(struct radeon_device *rdev);
0138 static void si_fini_cg(struct radeon_device *rdev);
0139 static void si_rlc_stop(struct radeon_device *rdev);
0140 
0141 static const u32 crtc_offsets[] =
0142 {
0143     EVERGREEN_CRTC0_REGISTER_OFFSET,
0144     EVERGREEN_CRTC1_REGISTER_OFFSET,
0145     EVERGREEN_CRTC2_REGISTER_OFFSET,
0146     EVERGREEN_CRTC3_REGISTER_OFFSET,
0147     EVERGREEN_CRTC4_REGISTER_OFFSET,
0148     EVERGREEN_CRTC5_REGISTER_OFFSET
0149 };
0150 
0151 static const u32 si_disp_int_status[] =
0152 {
0153     DISP_INTERRUPT_STATUS,
0154     DISP_INTERRUPT_STATUS_CONTINUE,
0155     DISP_INTERRUPT_STATUS_CONTINUE2,
0156     DISP_INTERRUPT_STATUS_CONTINUE3,
0157     DISP_INTERRUPT_STATUS_CONTINUE4,
0158     DISP_INTERRUPT_STATUS_CONTINUE5
0159 };
0160 
0161 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
0162 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
0163 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
0164 
0165 static const u32 verde_rlc_save_restore_register_list[] =
0166 {
0167     (0x8000 << 16) | (0x98f4 >> 2),
0168     0x00000000,
0169     (0x8040 << 16) | (0x98f4 >> 2),
0170     0x00000000,
0171     (0x8000 << 16) | (0xe80 >> 2),
0172     0x00000000,
0173     (0x8040 << 16) | (0xe80 >> 2),
0174     0x00000000,
0175     (0x8000 << 16) | (0x89bc >> 2),
0176     0x00000000,
0177     (0x8040 << 16) | (0x89bc >> 2),
0178     0x00000000,
0179     (0x8000 << 16) | (0x8c1c >> 2),
0180     0x00000000,
0181     (0x8040 << 16) | (0x8c1c >> 2),
0182     0x00000000,
0183     (0x9c00 << 16) | (0x98f0 >> 2),
0184     0x00000000,
0185     (0x9c00 << 16) | (0xe7c >> 2),
0186     0x00000000,
0187     (0x8000 << 16) | (0x9148 >> 2),
0188     0x00000000,
0189     (0x8040 << 16) | (0x9148 >> 2),
0190     0x00000000,
0191     (0x9c00 << 16) | (0x9150 >> 2),
0192     0x00000000,
0193     (0x9c00 << 16) | (0x897c >> 2),
0194     0x00000000,
0195     (0x9c00 << 16) | (0x8d8c >> 2),
0196     0x00000000,
0197     (0x9c00 << 16) | (0xac54 >> 2),
0198     0X00000000,
0199     0x3,
0200     (0x9c00 << 16) | (0x98f8 >> 2),
0201     0x00000000,
0202     (0x9c00 << 16) | (0x9910 >> 2),
0203     0x00000000,
0204     (0x9c00 << 16) | (0x9914 >> 2),
0205     0x00000000,
0206     (0x9c00 << 16) | (0x9918 >> 2),
0207     0x00000000,
0208     (0x9c00 << 16) | (0x991c >> 2),
0209     0x00000000,
0210     (0x9c00 << 16) | (0x9920 >> 2),
0211     0x00000000,
0212     (0x9c00 << 16) | (0x9924 >> 2),
0213     0x00000000,
0214     (0x9c00 << 16) | (0x9928 >> 2),
0215     0x00000000,
0216     (0x9c00 << 16) | (0x992c >> 2),
0217     0x00000000,
0218     (0x9c00 << 16) | (0x9930 >> 2),
0219     0x00000000,
0220     (0x9c00 << 16) | (0x9934 >> 2),
0221     0x00000000,
0222     (0x9c00 << 16) | (0x9938 >> 2),
0223     0x00000000,
0224     (0x9c00 << 16) | (0x993c >> 2),
0225     0x00000000,
0226     (0x9c00 << 16) | (0x9940 >> 2),
0227     0x00000000,
0228     (0x9c00 << 16) | (0x9944 >> 2),
0229     0x00000000,
0230     (0x9c00 << 16) | (0x9948 >> 2),
0231     0x00000000,
0232     (0x9c00 << 16) | (0x994c >> 2),
0233     0x00000000,
0234     (0x9c00 << 16) | (0x9950 >> 2),
0235     0x00000000,
0236     (0x9c00 << 16) | (0x9954 >> 2),
0237     0x00000000,
0238     (0x9c00 << 16) | (0x9958 >> 2),
0239     0x00000000,
0240     (0x9c00 << 16) | (0x995c >> 2),
0241     0x00000000,
0242     (0x9c00 << 16) | (0x9960 >> 2),
0243     0x00000000,
0244     (0x9c00 << 16) | (0x9964 >> 2),
0245     0x00000000,
0246     (0x9c00 << 16) | (0x9968 >> 2),
0247     0x00000000,
0248     (0x9c00 << 16) | (0x996c >> 2),
0249     0x00000000,
0250     (0x9c00 << 16) | (0x9970 >> 2),
0251     0x00000000,
0252     (0x9c00 << 16) | (0x9974 >> 2),
0253     0x00000000,
0254     (0x9c00 << 16) | (0x9978 >> 2),
0255     0x00000000,
0256     (0x9c00 << 16) | (0x997c >> 2),
0257     0x00000000,
0258     (0x9c00 << 16) | (0x9980 >> 2),
0259     0x00000000,
0260     (0x9c00 << 16) | (0x9984 >> 2),
0261     0x00000000,
0262     (0x9c00 << 16) | (0x9988 >> 2),
0263     0x00000000,
0264     (0x9c00 << 16) | (0x998c >> 2),
0265     0x00000000,
0266     (0x9c00 << 16) | (0x8c00 >> 2),
0267     0x00000000,
0268     (0x9c00 << 16) | (0x8c14 >> 2),
0269     0x00000000,
0270     (0x9c00 << 16) | (0x8c04 >> 2),
0271     0x00000000,
0272     (0x9c00 << 16) | (0x8c08 >> 2),
0273     0x00000000,
0274     (0x8000 << 16) | (0x9b7c >> 2),
0275     0x00000000,
0276     (0x8040 << 16) | (0x9b7c >> 2),
0277     0x00000000,
0278     (0x8000 << 16) | (0xe84 >> 2),
0279     0x00000000,
0280     (0x8040 << 16) | (0xe84 >> 2),
0281     0x00000000,
0282     (0x8000 << 16) | (0x89c0 >> 2),
0283     0x00000000,
0284     (0x8040 << 16) | (0x89c0 >> 2),
0285     0x00000000,
0286     (0x8000 << 16) | (0x914c >> 2),
0287     0x00000000,
0288     (0x8040 << 16) | (0x914c >> 2),
0289     0x00000000,
0290     (0x8000 << 16) | (0x8c20 >> 2),
0291     0x00000000,
0292     (0x8040 << 16) | (0x8c20 >> 2),
0293     0x00000000,
0294     (0x8000 << 16) | (0x9354 >> 2),
0295     0x00000000,
0296     (0x8040 << 16) | (0x9354 >> 2),
0297     0x00000000,
0298     (0x9c00 << 16) | (0x9060 >> 2),
0299     0x00000000,
0300     (0x9c00 << 16) | (0x9364 >> 2),
0301     0x00000000,
0302     (0x9c00 << 16) | (0x9100 >> 2),
0303     0x00000000,
0304     (0x9c00 << 16) | (0x913c >> 2),
0305     0x00000000,
0306     (0x8000 << 16) | (0x90e0 >> 2),
0307     0x00000000,
0308     (0x8000 << 16) | (0x90e4 >> 2),
0309     0x00000000,
0310     (0x8000 << 16) | (0x90e8 >> 2),
0311     0x00000000,
0312     (0x8040 << 16) | (0x90e0 >> 2),
0313     0x00000000,
0314     (0x8040 << 16) | (0x90e4 >> 2),
0315     0x00000000,
0316     (0x8040 << 16) | (0x90e8 >> 2),
0317     0x00000000,
0318     (0x9c00 << 16) | (0x8bcc >> 2),
0319     0x00000000,
0320     (0x9c00 << 16) | (0x8b24 >> 2),
0321     0x00000000,
0322     (0x9c00 << 16) | (0x88c4 >> 2),
0323     0x00000000,
0324     (0x9c00 << 16) | (0x8e50 >> 2),
0325     0x00000000,
0326     (0x9c00 << 16) | (0x8c0c >> 2),
0327     0x00000000,
0328     (0x9c00 << 16) | (0x8e58 >> 2),
0329     0x00000000,
0330     (0x9c00 << 16) | (0x8e5c >> 2),
0331     0x00000000,
0332     (0x9c00 << 16) | (0x9508 >> 2),
0333     0x00000000,
0334     (0x9c00 << 16) | (0x950c >> 2),
0335     0x00000000,
0336     (0x9c00 << 16) | (0x9494 >> 2),
0337     0x00000000,
0338     (0x9c00 << 16) | (0xac0c >> 2),
0339     0x00000000,
0340     (0x9c00 << 16) | (0xac10 >> 2),
0341     0x00000000,
0342     (0x9c00 << 16) | (0xac14 >> 2),
0343     0x00000000,
0344     (0x9c00 << 16) | (0xae00 >> 2),
0345     0x00000000,
0346     (0x9c00 << 16) | (0xac08 >> 2),
0347     0x00000000,
0348     (0x9c00 << 16) | (0x88d4 >> 2),
0349     0x00000000,
0350     (0x9c00 << 16) | (0x88c8 >> 2),
0351     0x00000000,
0352     (0x9c00 << 16) | (0x88cc >> 2),
0353     0x00000000,
0354     (0x9c00 << 16) | (0x89b0 >> 2),
0355     0x00000000,
0356     (0x9c00 << 16) | (0x8b10 >> 2),
0357     0x00000000,
0358     (0x9c00 << 16) | (0x8a14 >> 2),
0359     0x00000000,
0360     (0x9c00 << 16) | (0x9830 >> 2),
0361     0x00000000,
0362     (0x9c00 << 16) | (0x9834 >> 2),
0363     0x00000000,
0364     (0x9c00 << 16) | (0x9838 >> 2),
0365     0x00000000,
0366     (0x9c00 << 16) | (0x9a10 >> 2),
0367     0x00000000,
0368     (0x8000 << 16) | (0x9870 >> 2),
0369     0x00000000,
0370     (0x8000 << 16) | (0x9874 >> 2),
0371     0x00000000,
0372     (0x8001 << 16) | (0x9870 >> 2),
0373     0x00000000,
0374     (0x8001 << 16) | (0x9874 >> 2),
0375     0x00000000,
0376     (0x8040 << 16) | (0x9870 >> 2),
0377     0x00000000,
0378     (0x8040 << 16) | (0x9874 >> 2),
0379     0x00000000,
0380     (0x8041 << 16) | (0x9870 >> 2),
0381     0x00000000,
0382     (0x8041 << 16) | (0x9874 >> 2),
0383     0x00000000,
0384     0x00000000
0385 };
0386 
0387 static const u32 tahiti_golden_rlc_registers[] =
0388 {
0389     0xc424, 0xffffffff, 0x00601005,
0390     0xc47c, 0xffffffff, 0x10104040,
0391     0xc488, 0xffffffff, 0x0100000a,
0392     0xc314, 0xffffffff, 0x00000800,
0393     0xc30c, 0xffffffff, 0x800000f4,
0394     0xf4a8, 0xffffffff, 0x00000000
0395 };
0396 
0397 static const u32 tahiti_golden_registers[] =
0398 {
0399     0x9a10, 0x00010000, 0x00018208,
0400     0x9830, 0xffffffff, 0x00000000,
0401     0x9834, 0xf00fffff, 0x00000400,
0402     0x9838, 0x0002021c, 0x00020200,
0403     0xc78, 0x00000080, 0x00000000,
0404     0xd030, 0x000300c0, 0x00800040,
0405     0xd830, 0x000300c0, 0x00800040,
0406     0x5bb0, 0x000000f0, 0x00000070,
0407     0x5bc0, 0x00200000, 0x50100000,
0408     0x7030, 0x31000311, 0x00000011,
0409     0x277c, 0x00000003, 0x000007ff,
0410     0x240c, 0x000007ff, 0x00000000,
0411     0x8a14, 0xf000001f, 0x00000007,
0412     0x8b24, 0xffffffff, 0x00ffffff,
0413     0x8b10, 0x0000ff0f, 0x00000000,
0414     0x28a4c, 0x07ffffff, 0x4e000000,
0415     0x28350, 0x3f3f3fff, 0x2a00126a,
0416     0x30, 0x000000ff, 0x0040,
0417     0x34, 0x00000040, 0x00004040,
0418     0x9100, 0x07ffffff, 0x03000000,
0419     0x8e88, 0x01ff1f3f, 0x00000000,
0420     0x8e84, 0x01ff1f3f, 0x00000000,
0421     0x9060, 0x0000007f, 0x00000020,
0422     0x9508, 0x00010000, 0x00010000,
0423     0xac14, 0x00000200, 0x000002fb,
0424     0xac10, 0xffffffff, 0x0000543b,
0425     0xac0c, 0xffffffff, 0xa9210876,
0426     0x88d0, 0xffffffff, 0x000fff40,
0427     0x88d4, 0x0000001f, 0x00000010,
0428     0x1410, 0x20000000, 0x20fffed8,
0429     0x15c0, 0x000c0fc0, 0x000c0400
0430 };
0431 
0432 static const u32 tahiti_golden_registers2[] =
0433 {
0434     0xc64, 0x00000001, 0x00000001
0435 };
0436 
0437 static const u32 pitcairn_golden_rlc_registers[] =
0438 {
0439     0xc424, 0xffffffff, 0x00601004,
0440     0xc47c, 0xffffffff, 0x10102020,
0441     0xc488, 0xffffffff, 0x01000020,
0442     0xc314, 0xffffffff, 0x00000800,
0443     0xc30c, 0xffffffff, 0x800000a4
0444 };
0445 
0446 static const u32 pitcairn_golden_registers[] =
0447 {
0448     0x9a10, 0x00010000, 0x00018208,
0449     0x9830, 0xffffffff, 0x00000000,
0450     0x9834, 0xf00fffff, 0x00000400,
0451     0x9838, 0x0002021c, 0x00020200,
0452     0xc78, 0x00000080, 0x00000000,
0453     0xd030, 0x000300c0, 0x00800040,
0454     0xd830, 0x000300c0, 0x00800040,
0455     0x5bb0, 0x000000f0, 0x00000070,
0456     0x5bc0, 0x00200000, 0x50100000,
0457     0x7030, 0x31000311, 0x00000011,
0458     0x2ae4, 0x00073ffe, 0x000022a2,
0459     0x240c, 0x000007ff, 0x00000000,
0460     0x8a14, 0xf000001f, 0x00000007,
0461     0x8b24, 0xffffffff, 0x00ffffff,
0462     0x8b10, 0x0000ff0f, 0x00000000,
0463     0x28a4c, 0x07ffffff, 0x4e000000,
0464     0x28350, 0x3f3f3fff, 0x2a00126a,
0465     0x30, 0x000000ff, 0x0040,
0466     0x34, 0x00000040, 0x00004040,
0467     0x9100, 0x07ffffff, 0x03000000,
0468     0x9060, 0x0000007f, 0x00000020,
0469     0x9508, 0x00010000, 0x00010000,
0470     0xac14, 0x000003ff, 0x000000f7,
0471     0xac10, 0xffffffff, 0x00000000,
0472     0xac0c, 0xffffffff, 0x32761054,
0473     0x88d4, 0x0000001f, 0x00000010,
0474     0x15c0, 0x000c0fc0, 0x000c0400
0475 };
0476 
0477 static const u32 verde_golden_rlc_registers[] =
0478 {
0479     0xc424, 0xffffffff, 0x033f1005,
0480     0xc47c, 0xffffffff, 0x10808020,
0481     0xc488, 0xffffffff, 0x00800008,
0482     0xc314, 0xffffffff, 0x00001000,
0483     0xc30c, 0xffffffff, 0x80010014
0484 };
0485 
0486 static const u32 verde_golden_registers[] =
0487 {
0488     0x9a10, 0x00010000, 0x00018208,
0489     0x9830, 0xffffffff, 0x00000000,
0490     0x9834, 0xf00fffff, 0x00000400,
0491     0x9838, 0x0002021c, 0x00020200,
0492     0xc78, 0x00000080, 0x00000000,
0493     0xd030, 0x000300c0, 0x00800040,
0494     0xd030, 0x000300c0, 0x00800040,
0495     0xd830, 0x000300c0, 0x00800040,
0496     0xd830, 0x000300c0, 0x00800040,
0497     0x5bb0, 0x000000f0, 0x00000070,
0498     0x5bc0, 0x00200000, 0x50100000,
0499     0x7030, 0x31000311, 0x00000011,
0500     0x2ae4, 0x00073ffe, 0x000022a2,
0501     0x2ae4, 0x00073ffe, 0x000022a2,
0502     0x2ae4, 0x00073ffe, 0x000022a2,
0503     0x240c, 0x000007ff, 0x00000000,
0504     0x240c, 0x000007ff, 0x00000000,
0505     0x240c, 0x000007ff, 0x00000000,
0506     0x8a14, 0xf000001f, 0x00000007,
0507     0x8a14, 0xf000001f, 0x00000007,
0508     0x8a14, 0xf000001f, 0x00000007,
0509     0x8b24, 0xffffffff, 0x00ffffff,
0510     0x8b10, 0x0000ff0f, 0x00000000,
0511     0x28a4c, 0x07ffffff, 0x4e000000,
0512     0x28350, 0x3f3f3fff, 0x0000124a,
0513     0x28350, 0x3f3f3fff, 0x0000124a,
0514     0x28350, 0x3f3f3fff, 0x0000124a,
0515     0x30, 0x000000ff, 0x0040,
0516     0x34, 0x00000040, 0x00004040,
0517     0x9100, 0x07ffffff, 0x03000000,
0518     0x9100, 0x07ffffff, 0x03000000,
0519     0x8e88, 0x01ff1f3f, 0x00000000,
0520     0x8e88, 0x01ff1f3f, 0x00000000,
0521     0x8e88, 0x01ff1f3f, 0x00000000,
0522     0x8e84, 0x01ff1f3f, 0x00000000,
0523     0x8e84, 0x01ff1f3f, 0x00000000,
0524     0x8e84, 0x01ff1f3f, 0x00000000,
0525     0x9060, 0x0000007f, 0x00000020,
0526     0x9508, 0x00010000, 0x00010000,
0527     0xac14, 0x000003ff, 0x00000003,
0528     0xac14, 0x000003ff, 0x00000003,
0529     0xac14, 0x000003ff, 0x00000003,
0530     0xac10, 0xffffffff, 0x00000000,
0531     0xac10, 0xffffffff, 0x00000000,
0532     0xac10, 0xffffffff, 0x00000000,
0533     0xac0c, 0xffffffff, 0x00001032,
0534     0xac0c, 0xffffffff, 0x00001032,
0535     0xac0c, 0xffffffff, 0x00001032,
0536     0x88d4, 0x0000001f, 0x00000010,
0537     0x88d4, 0x0000001f, 0x00000010,
0538     0x88d4, 0x0000001f, 0x00000010,
0539     0x15c0, 0x000c0fc0, 0x000c0400
0540 };
0541 
0542 static const u32 oland_golden_rlc_registers[] =
0543 {
0544     0xc424, 0xffffffff, 0x00601005,
0545     0xc47c, 0xffffffff, 0x10104040,
0546     0xc488, 0xffffffff, 0x0100000a,
0547     0xc314, 0xffffffff, 0x00000800,
0548     0xc30c, 0xffffffff, 0x800000f4
0549 };
0550 
0551 static const u32 oland_golden_registers[] =
0552 {
0553     0x9a10, 0x00010000, 0x00018208,
0554     0x9830, 0xffffffff, 0x00000000,
0555     0x9834, 0xf00fffff, 0x00000400,
0556     0x9838, 0x0002021c, 0x00020200,
0557     0xc78, 0x00000080, 0x00000000,
0558     0xd030, 0x000300c0, 0x00800040,
0559     0xd830, 0x000300c0, 0x00800040,
0560     0x5bb0, 0x000000f0, 0x00000070,
0561     0x5bc0, 0x00200000, 0x50100000,
0562     0x7030, 0x31000311, 0x00000011,
0563     0x2ae4, 0x00073ffe, 0x000022a2,
0564     0x240c, 0x000007ff, 0x00000000,
0565     0x8a14, 0xf000001f, 0x00000007,
0566     0x8b24, 0xffffffff, 0x00ffffff,
0567     0x8b10, 0x0000ff0f, 0x00000000,
0568     0x28a4c, 0x07ffffff, 0x4e000000,
0569     0x28350, 0x3f3f3fff, 0x00000082,
0570     0x30, 0x000000ff, 0x0040,
0571     0x34, 0x00000040, 0x00004040,
0572     0x9100, 0x07ffffff, 0x03000000,
0573     0x9060, 0x0000007f, 0x00000020,
0574     0x9508, 0x00010000, 0x00010000,
0575     0xac14, 0x000003ff, 0x000000f3,
0576     0xac10, 0xffffffff, 0x00000000,
0577     0xac0c, 0xffffffff, 0x00003210,
0578     0x88d4, 0x0000001f, 0x00000010,
0579     0x15c0, 0x000c0fc0, 0x000c0400
0580 };
0581 
0582 static const u32 hainan_golden_registers[] =
0583 {
0584     0x9a10, 0x00010000, 0x00018208,
0585     0x9830, 0xffffffff, 0x00000000,
0586     0x9834, 0xf00fffff, 0x00000400,
0587     0x9838, 0x0002021c, 0x00020200,
0588     0xd0c0, 0xff000fff, 0x00000100,
0589     0xd030, 0x000300c0, 0x00800040,
0590     0xd8c0, 0xff000fff, 0x00000100,
0591     0xd830, 0x000300c0, 0x00800040,
0592     0x2ae4, 0x00073ffe, 0x000022a2,
0593     0x240c, 0x000007ff, 0x00000000,
0594     0x8a14, 0xf000001f, 0x00000007,
0595     0x8b24, 0xffffffff, 0x00ffffff,
0596     0x8b10, 0x0000ff0f, 0x00000000,
0597     0x28a4c, 0x07ffffff, 0x4e000000,
0598     0x28350, 0x3f3f3fff, 0x00000000,
0599     0x30, 0x000000ff, 0x0040,
0600     0x34, 0x00000040, 0x00004040,
0601     0x9100, 0x03e00000, 0x03600000,
0602     0x9060, 0x0000007f, 0x00000020,
0603     0x9508, 0x00010000, 0x00010000,
0604     0xac14, 0x000003ff, 0x000000f1,
0605     0xac10, 0xffffffff, 0x00000000,
0606     0xac0c, 0xffffffff, 0x00003210,
0607     0x88d4, 0x0000001f, 0x00000010,
0608     0x15c0, 0x000c0fc0, 0x000c0400
0609 };
0610 
0611 static const u32 hainan_golden_registers2[] =
0612 {
0613     0x98f8, 0xffffffff, 0x02010001
0614 };
0615 
0616 static const u32 tahiti_mgcg_cgcg_init[] =
0617 {
0618     0xc400, 0xffffffff, 0xfffffffc,
0619     0x802c, 0xffffffff, 0xe0000000,
0620     0x9a60, 0xffffffff, 0x00000100,
0621     0x92a4, 0xffffffff, 0x00000100,
0622     0xc164, 0xffffffff, 0x00000100,
0623     0x9774, 0xffffffff, 0x00000100,
0624     0x8984, 0xffffffff, 0x06000100,
0625     0x8a18, 0xffffffff, 0x00000100,
0626     0x92a0, 0xffffffff, 0x00000100,
0627     0xc380, 0xffffffff, 0x00000100,
0628     0x8b28, 0xffffffff, 0x00000100,
0629     0x9144, 0xffffffff, 0x00000100,
0630     0x8d88, 0xffffffff, 0x00000100,
0631     0x8d8c, 0xffffffff, 0x00000100,
0632     0x9030, 0xffffffff, 0x00000100,
0633     0x9034, 0xffffffff, 0x00000100,
0634     0x9038, 0xffffffff, 0x00000100,
0635     0x903c, 0xffffffff, 0x00000100,
0636     0xad80, 0xffffffff, 0x00000100,
0637     0xac54, 0xffffffff, 0x00000100,
0638     0x897c, 0xffffffff, 0x06000100,
0639     0x9868, 0xffffffff, 0x00000100,
0640     0x9510, 0xffffffff, 0x00000100,
0641     0xaf04, 0xffffffff, 0x00000100,
0642     0xae04, 0xffffffff, 0x00000100,
0643     0x949c, 0xffffffff, 0x00000100,
0644     0x802c, 0xffffffff, 0xe0000000,
0645     0x9160, 0xffffffff, 0x00010000,
0646     0x9164, 0xffffffff, 0x00030002,
0647     0x9168, 0xffffffff, 0x00040007,
0648     0x916c, 0xffffffff, 0x00060005,
0649     0x9170, 0xffffffff, 0x00090008,
0650     0x9174, 0xffffffff, 0x00020001,
0651     0x9178, 0xffffffff, 0x00040003,
0652     0x917c, 0xffffffff, 0x00000007,
0653     0x9180, 0xffffffff, 0x00060005,
0654     0x9184, 0xffffffff, 0x00090008,
0655     0x9188, 0xffffffff, 0x00030002,
0656     0x918c, 0xffffffff, 0x00050004,
0657     0x9190, 0xffffffff, 0x00000008,
0658     0x9194, 0xffffffff, 0x00070006,
0659     0x9198, 0xffffffff, 0x000a0009,
0660     0x919c, 0xffffffff, 0x00040003,
0661     0x91a0, 0xffffffff, 0x00060005,
0662     0x91a4, 0xffffffff, 0x00000009,
0663     0x91a8, 0xffffffff, 0x00080007,
0664     0x91ac, 0xffffffff, 0x000b000a,
0665     0x91b0, 0xffffffff, 0x00050004,
0666     0x91b4, 0xffffffff, 0x00070006,
0667     0x91b8, 0xffffffff, 0x0008000b,
0668     0x91bc, 0xffffffff, 0x000a0009,
0669     0x91c0, 0xffffffff, 0x000d000c,
0670     0x91c4, 0xffffffff, 0x00060005,
0671     0x91c8, 0xffffffff, 0x00080007,
0672     0x91cc, 0xffffffff, 0x0000000b,
0673     0x91d0, 0xffffffff, 0x000a0009,
0674     0x91d4, 0xffffffff, 0x000d000c,
0675     0x91d8, 0xffffffff, 0x00070006,
0676     0x91dc, 0xffffffff, 0x00090008,
0677     0x91e0, 0xffffffff, 0x0000000c,
0678     0x91e4, 0xffffffff, 0x000b000a,
0679     0x91e8, 0xffffffff, 0x000e000d,
0680     0x91ec, 0xffffffff, 0x00080007,
0681     0x91f0, 0xffffffff, 0x000a0009,
0682     0x91f4, 0xffffffff, 0x0000000d,
0683     0x91f8, 0xffffffff, 0x000c000b,
0684     0x91fc, 0xffffffff, 0x000f000e,
0685     0x9200, 0xffffffff, 0x00090008,
0686     0x9204, 0xffffffff, 0x000b000a,
0687     0x9208, 0xffffffff, 0x000c000f,
0688     0x920c, 0xffffffff, 0x000e000d,
0689     0x9210, 0xffffffff, 0x00110010,
0690     0x9214, 0xffffffff, 0x000a0009,
0691     0x9218, 0xffffffff, 0x000c000b,
0692     0x921c, 0xffffffff, 0x0000000f,
0693     0x9220, 0xffffffff, 0x000e000d,
0694     0x9224, 0xffffffff, 0x00110010,
0695     0x9228, 0xffffffff, 0x000b000a,
0696     0x922c, 0xffffffff, 0x000d000c,
0697     0x9230, 0xffffffff, 0x00000010,
0698     0x9234, 0xffffffff, 0x000f000e,
0699     0x9238, 0xffffffff, 0x00120011,
0700     0x923c, 0xffffffff, 0x000c000b,
0701     0x9240, 0xffffffff, 0x000e000d,
0702     0x9244, 0xffffffff, 0x00000011,
0703     0x9248, 0xffffffff, 0x0010000f,
0704     0x924c, 0xffffffff, 0x00130012,
0705     0x9250, 0xffffffff, 0x000d000c,
0706     0x9254, 0xffffffff, 0x000f000e,
0707     0x9258, 0xffffffff, 0x00100013,
0708     0x925c, 0xffffffff, 0x00120011,
0709     0x9260, 0xffffffff, 0x00150014,
0710     0x9264, 0xffffffff, 0x000e000d,
0711     0x9268, 0xffffffff, 0x0010000f,
0712     0x926c, 0xffffffff, 0x00000013,
0713     0x9270, 0xffffffff, 0x00120011,
0714     0x9274, 0xffffffff, 0x00150014,
0715     0x9278, 0xffffffff, 0x000f000e,
0716     0x927c, 0xffffffff, 0x00110010,
0717     0x9280, 0xffffffff, 0x00000014,
0718     0x9284, 0xffffffff, 0x00130012,
0719     0x9288, 0xffffffff, 0x00160015,
0720     0x928c, 0xffffffff, 0x0010000f,
0721     0x9290, 0xffffffff, 0x00120011,
0722     0x9294, 0xffffffff, 0x00000015,
0723     0x9298, 0xffffffff, 0x00140013,
0724     0x929c, 0xffffffff, 0x00170016,
0725     0x9150, 0xffffffff, 0x96940200,
0726     0x8708, 0xffffffff, 0x00900100,
0727     0xc478, 0xffffffff, 0x00000080,
0728     0xc404, 0xffffffff, 0x0020003f,
0729     0x30, 0xffffffff, 0x0000001c,
0730     0x34, 0x000f0000, 0x000f0000,
0731     0x160c, 0xffffffff, 0x00000100,
0732     0x1024, 0xffffffff, 0x00000100,
0733     0x102c, 0x00000101, 0x00000000,
0734     0x20a8, 0xffffffff, 0x00000104,
0735     0x264c, 0x000c0000, 0x000c0000,
0736     0x2648, 0x000c0000, 0x000c0000,
0737     0x55e4, 0xff000fff, 0x00000100,
0738     0x55e8, 0x00000001, 0x00000001,
0739     0x2f50, 0x00000001, 0x00000001,
0740     0x30cc, 0xc0000fff, 0x00000104,
0741     0xc1e4, 0x00000001, 0x00000001,
0742     0xd0c0, 0xfffffff0, 0x00000100,
0743     0xd8c0, 0xfffffff0, 0x00000100
0744 };
0745 
0746 static const u32 pitcairn_mgcg_cgcg_init[] =
0747 {
0748     0xc400, 0xffffffff, 0xfffffffc,
0749     0x802c, 0xffffffff, 0xe0000000,
0750     0x9a60, 0xffffffff, 0x00000100,
0751     0x92a4, 0xffffffff, 0x00000100,
0752     0xc164, 0xffffffff, 0x00000100,
0753     0x9774, 0xffffffff, 0x00000100,
0754     0x8984, 0xffffffff, 0x06000100,
0755     0x8a18, 0xffffffff, 0x00000100,
0756     0x92a0, 0xffffffff, 0x00000100,
0757     0xc380, 0xffffffff, 0x00000100,
0758     0x8b28, 0xffffffff, 0x00000100,
0759     0x9144, 0xffffffff, 0x00000100,
0760     0x8d88, 0xffffffff, 0x00000100,
0761     0x8d8c, 0xffffffff, 0x00000100,
0762     0x9030, 0xffffffff, 0x00000100,
0763     0x9034, 0xffffffff, 0x00000100,
0764     0x9038, 0xffffffff, 0x00000100,
0765     0x903c, 0xffffffff, 0x00000100,
0766     0xad80, 0xffffffff, 0x00000100,
0767     0xac54, 0xffffffff, 0x00000100,
0768     0x897c, 0xffffffff, 0x06000100,
0769     0x9868, 0xffffffff, 0x00000100,
0770     0x9510, 0xffffffff, 0x00000100,
0771     0xaf04, 0xffffffff, 0x00000100,
0772     0xae04, 0xffffffff, 0x00000100,
0773     0x949c, 0xffffffff, 0x00000100,
0774     0x802c, 0xffffffff, 0xe0000000,
0775     0x9160, 0xffffffff, 0x00010000,
0776     0x9164, 0xffffffff, 0x00030002,
0777     0x9168, 0xffffffff, 0x00040007,
0778     0x916c, 0xffffffff, 0x00060005,
0779     0x9170, 0xffffffff, 0x00090008,
0780     0x9174, 0xffffffff, 0x00020001,
0781     0x9178, 0xffffffff, 0x00040003,
0782     0x917c, 0xffffffff, 0x00000007,
0783     0x9180, 0xffffffff, 0x00060005,
0784     0x9184, 0xffffffff, 0x00090008,
0785     0x9188, 0xffffffff, 0x00030002,
0786     0x918c, 0xffffffff, 0x00050004,
0787     0x9190, 0xffffffff, 0x00000008,
0788     0x9194, 0xffffffff, 0x00070006,
0789     0x9198, 0xffffffff, 0x000a0009,
0790     0x919c, 0xffffffff, 0x00040003,
0791     0x91a0, 0xffffffff, 0x00060005,
0792     0x91a4, 0xffffffff, 0x00000009,
0793     0x91a8, 0xffffffff, 0x00080007,
0794     0x91ac, 0xffffffff, 0x000b000a,
0795     0x91b0, 0xffffffff, 0x00050004,
0796     0x91b4, 0xffffffff, 0x00070006,
0797     0x91b8, 0xffffffff, 0x0008000b,
0798     0x91bc, 0xffffffff, 0x000a0009,
0799     0x91c0, 0xffffffff, 0x000d000c,
0800     0x9200, 0xffffffff, 0x00090008,
0801     0x9204, 0xffffffff, 0x000b000a,
0802     0x9208, 0xffffffff, 0x000c000f,
0803     0x920c, 0xffffffff, 0x000e000d,
0804     0x9210, 0xffffffff, 0x00110010,
0805     0x9214, 0xffffffff, 0x000a0009,
0806     0x9218, 0xffffffff, 0x000c000b,
0807     0x921c, 0xffffffff, 0x0000000f,
0808     0x9220, 0xffffffff, 0x000e000d,
0809     0x9224, 0xffffffff, 0x00110010,
0810     0x9228, 0xffffffff, 0x000b000a,
0811     0x922c, 0xffffffff, 0x000d000c,
0812     0x9230, 0xffffffff, 0x00000010,
0813     0x9234, 0xffffffff, 0x000f000e,
0814     0x9238, 0xffffffff, 0x00120011,
0815     0x923c, 0xffffffff, 0x000c000b,
0816     0x9240, 0xffffffff, 0x000e000d,
0817     0x9244, 0xffffffff, 0x00000011,
0818     0x9248, 0xffffffff, 0x0010000f,
0819     0x924c, 0xffffffff, 0x00130012,
0820     0x9250, 0xffffffff, 0x000d000c,
0821     0x9254, 0xffffffff, 0x000f000e,
0822     0x9258, 0xffffffff, 0x00100013,
0823     0x925c, 0xffffffff, 0x00120011,
0824     0x9260, 0xffffffff, 0x00150014,
0825     0x9150, 0xffffffff, 0x96940200,
0826     0x8708, 0xffffffff, 0x00900100,
0827     0xc478, 0xffffffff, 0x00000080,
0828     0xc404, 0xffffffff, 0x0020003f,
0829     0x30, 0xffffffff, 0x0000001c,
0830     0x34, 0x000f0000, 0x000f0000,
0831     0x160c, 0xffffffff, 0x00000100,
0832     0x1024, 0xffffffff, 0x00000100,
0833     0x102c, 0x00000101, 0x00000000,
0834     0x20a8, 0xffffffff, 0x00000104,
0835     0x55e4, 0xff000fff, 0x00000100,
0836     0x55e8, 0x00000001, 0x00000001,
0837     0x2f50, 0x00000001, 0x00000001,
0838     0x30cc, 0xc0000fff, 0x00000104,
0839     0xc1e4, 0x00000001, 0x00000001,
0840     0xd0c0, 0xfffffff0, 0x00000100,
0841     0xd8c0, 0xfffffff0, 0x00000100
0842 };
0843 
0844 static const u32 verde_mgcg_cgcg_init[] =
0845 {
0846     0xc400, 0xffffffff, 0xfffffffc,
0847     0x802c, 0xffffffff, 0xe0000000,
0848     0x9a60, 0xffffffff, 0x00000100,
0849     0x92a4, 0xffffffff, 0x00000100,
0850     0xc164, 0xffffffff, 0x00000100,
0851     0x9774, 0xffffffff, 0x00000100,
0852     0x8984, 0xffffffff, 0x06000100,
0853     0x8a18, 0xffffffff, 0x00000100,
0854     0x92a0, 0xffffffff, 0x00000100,
0855     0xc380, 0xffffffff, 0x00000100,
0856     0x8b28, 0xffffffff, 0x00000100,
0857     0x9144, 0xffffffff, 0x00000100,
0858     0x8d88, 0xffffffff, 0x00000100,
0859     0x8d8c, 0xffffffff, 0x00000100,
0860     0x9030, 0xffffffff, 0x00000100,
0861     0x9034, 0xffffffff, 0x00000100,
0862     0x9038, 0xffffffff, 0x00000100,
0863     0x903c, 0xffffffff, 0x00000100,
0864     0xad80, 0xffffffff, 0x00000100,
0865     0xac54, 0xffffffff, 0x00000100,
0866     0x897c, 0xffffffff, 0x06000100,
0867     0x9868, 0xffffffff, 0x00000100,
0868     0x9510, 0xffffffff, 0x00000100,
0869     0xaf04, 0xffffffff, 0x00000100,
0870     0xae04, 0xffffffff, 0x00000100,
0871     0x949c, 0xffffffff, 0x00000100,
0872     0x802c, 0xffffffff, 0xe0000000,
0873     0x9160, 0xffffffff, 0x00010000,
0874     0x9164, 0xffffffff, 0x00030002,
0875     0x9168, 0xffffffff, 0x00040007,
0876     0x916c, 0xffffffff, 0x00060005,
0877     0x9170, 0xffffffff, 0x00090008,
0878     0x9174, 0xffffffff, 0x00020001,
0879     0x9178, 0xffffffff, 0x00040003,
0880     0x917c, 0xffffffff, 0x00000007,
0881     0x9180, 0xffffffff, 0x00060005,
0882     0x9184, 0xffffffff, 0x00090008,
0883     0x9188, 0xffffffff, 0x00030002,
0884     0x918c, 0xffffffff, 0x00050004,
0885     0x9190, 0xffffffff, 0x00000008,
0886     0x9194, 0xffffffff, 0x00070006,
0887     0x9198, 0xffffffff, 0x000a0009,
0888     0x919c, 0xffffffff, 0x00040003,
0889     0x91a0, 0xffffffff, 0x00060005,
0890     0x91a4, 0xffffffff, 0x00000009,
0891     0x91a8, 0xffffffff, 0x00080007,
0892     0x91ac, 0xffffffff, 0x000b000a,
0893     0x91b0, 0xffffffff, 0x00050004,
0894     0x91b4, 0xffffffff, 0x00070006,
0895     0x91b8, 0xffffffff, 0x0008000b,
0896     0x91bc, 0xffffffff, 0x000a0009,
0897     0x91c0, 0xffffffff, 0x000d000c,
0898     0x9200, 0xffffffff, 0x00090008,
0899     0x9204, 0xffffffff, 0x000b000a,
0900     0x9208, 0xffffffff, 0x000c000f,
0901     0x920c, 0xffffffff, 0x000e000d,
0902     0x9210, 0xffffffff, 0x00110010,
0903     0x9214, 0xffffffff, 0x000a0009,
0904     0x9218, 0xffffffff, 0x000c000b,
0905     0x921c, 0xffffffff, 0x0000000f,
0906     0x9220, 0xffffffff, 0x000e000d,
0907     0x9224, 0xffffffff, 0x00110010,
0908     0x9228, 0xffffffff, 0x000b000a,
0909     0x922c, 0xffffffff, 0x000d000c,
0910     0x9230, 0xffffffff, 0x00000010,
0911     0x9234, 0xffffffff, 0x000f000e,
0912     0x9238, 0xffffffff, 0x00120011,
0913     0x923c, 0xffffffff, 0x000c000b,
0914     0x9240, 0xffffffff, 0x000e000d,
0915     0x9244, 0xffffffff, 0x00000011,
0916     0x9248, 0xffffffff, 0x0010000f,
0917     0x924c, 0xffffffff, 0x00130012,
0918     0x9250, 0xffffffff, 0x000d000c,
0919     0x9254, 0xffffffff, 0x000f000e,
0920     0x9258, 0xffffffff, 0x00100013,
0921     0x925c, 0xffffffff, 0x00120011,
0922     0x9260, 0xffffffff, 0x00150014,
0923     0x9150, 0xffffffff, 0x96940200,
0924     0x8708, 0xffffffff, 0x00900100,
0925     0xc478, 0xffffffff, 0x00000080,
0926     0xc404, 0xffffffff, 0x0020003f,
0927     0x30, 0xffffffff, 0x0000001c,
0928     0x34, 0x000f0000, 0x000f0000,
0929     0x160c, 0xffffffff, 0x00000100,
0930     0x1024, 0xffffffff, 0x00000100,
0931     0x102c, 0x00000101, 0x00000000,
0932     0x20a8, 0xffffffff, 0x00000104,
0933     0x264c, 0x000c0000, 0x000c0000,
0934     0x2648, 0x000c0000, 0x000c0000,
0935     0x55e4, 0xff000fff, 0x00000100,
0936     0x55e8, 0x00000001, 0x00000001,
0937     0x2f50, 0x00000001, 0x00000001,
0938     0x30cc, 0xc0000fff, 0x00000104,
0939     0xc1e4, 0x00000001, 0x00000001,
0940     0xd0c0, 0xfffffff0, 0x00000100,
0941     0xd8c0, 0xfffffff0, 0x00000100
0942 };
0943 
0944 static const u32 oland_mgcg_cgcg_init[] =
0945 {
0946     0xc400, 0xffffffff, 0xfffffffc,
0947     0x802c, 0xffffffff, 0xe0000000,
0948     0x9a60, 0xffffffff, 0x00000100,
0949     0x92a4, 0xffffffff, 0x00000100,
0950     0xc164, 0xffffffff, 0x00000100,
0951     0x9774, 0xffffffff, 0x00000100,
0952     0x8984, 0xffffffff, 0x06000100,
0953     0x8a18, 0xffffffff, 0x00000100,
0954     0x92a0, 0xffffffff, 0x00000100,
0955     0xc380, 0xffffffff, 0x00000100,
0956     0x8b28, 0xffffffff, 0x00000100,
0957     0x9144, 0xffffffff, 0x00000100,
0958     0x8d88, 0xffffffff, 0x00000100,
0959     0x8d8c, 0xffffffff, 0x00000100,
0960     0x9030, 0xffffffff, 0x00000100,
0961     0x9034, 0xffffffff, 0x00000100,
0962     0x9038, 0xffffffff, 0x00000100,
0963     0x903c, 0xffffffff, 0x00000100,
0964     0xad80, 0xffffffff, 0x00000100,
0965     0xac54, 0xffffffff, 0x00000100,
0966     0x897c, 0xffffffff, 0x06000100,
0967     0x9868, 0xffffffff, 0x00000100,
0968     0x9510, 0xffffffff, 0x00000100,
0969     0xaf04, 0xffffffff, 0x00000100,
0970     0xae04, 0xffffffff, 0x00000100,
0971     0x949c, 0xffffffff, 0x00000100,
0972     0x802c, 0xffffffff, 0xe0000000,
0973     0x9160, 0xffffffff, 0x00010000,
0974     0x9164, 0xffffffff, 0x00030002,
0975     0x9168, 0xffffffff, 0x00040007,
0976     0x916c, 0xffffffff, 0x00060005,
0977     0x9170, 0xffffffff, 0x00090008,
0978     0x9174, 0xffffffff, 0x00020001,
0979     0x9178, 0xffffffff, 0x00040003,
0980     0x917c, 0xffffffff, 0x00000007,
0981     0x9180, 0xffffffff, 0x00060005,
0982     0x9184, 0xffffffff, 0x00090008,
0983     0x9188, 0xffffffff, 0x00030002,
0984     0x918c, 0xffffffff, 0x00050004,
0985     0x9190, 0xffffffff, 0x00000008,
0986     0x9194, 0xffffffff, 0x00070006,
0987     0x9198, 0xffffffff, 0x000a0009,
0988     0x919c, 0xffffffff, 0x00040003,
0989     0x91a0, 0xffffffff, 0x00060005,
0990     0x91a4, 0xffffffff, 0x00000009,
0991     0x91a8, 0xffffffff, 0x00080007,
0992     0x91ac, 0xffffffff, 0x000b000a,
0993     0x91b0, 0xffffffff, 0x00050004,
0994     0x91b4, 0xffffffff, 0x00070006,
0995     0x91b8, 0xffffffff, 0x0008000b,
0996     0x91bc, 0xffffffff, 0x000a0009,
0997     0x91c0, 0xffffffff, 0x000d000c,
0998     0x91c4, 0xffffffff, 0x00060005,
0999     0x91c8, 0xffffffff, 0x00080007,
1000     0x91cc, 0xffffffff, 0x0000000b,
1001     0x91d0, 0xffffffff, 0x000a0009,
1002     0x91d4, 0xffffffff, 0x000d000c,
1003     0x9150, 0xffffffff, 0x96940200,
1004     0x8708, 0xffffffff, 0x00900100,
1005     0xc478, 0xffffffff, 0x00000080,
1006     0xc404, 0xffffffff, 0x0020003f,
1007     0x30, 0xffffffff, 0x0000001c,
1008     0x34, 0x000f0000, 0x000f0000,
1009     0x160c, 0xffffffff, 0x00000100,
1010     0x1024, 0xffffffff, 0x00000100,
1011     0x102c, 0x00000101, 0x00000000,
1012     0x20a8, 0xffffffff, 0x00000104,
1013     0x264c, 0x000c0000, 0x000c0000,
1014     0x2648, 0x000c0000, 0x000c0000,
1015     0x55e4, 0xff000fff, 0x00000100,
1016     0x55e8, 0x00000001, 0x00000001,
1017     0x2f50, 0x00000001, 0x00000001,
1018     0x30cc, 0xc0000fff, 0x00000104,
1019     0xc1e4, 0x00000001, 0x00000001,
1020     0xd0c0, 0xfffffff0, 0x00000100,
1021     0xd8c0, 0xfffffff0, 0x00000100
1022 };
1023 
1024 static const u32 hainan_mgcg_cgcg_init[] =
1025 {
1026     0xc400, 0xffffffff, 0xfffffffc,
1027     0x802c, 0xffffffff, 0xe0000000,
1028     0x9a60, 0xffffffff, 0x00000100,
1029     0x92a4, 0xffffffff, 0x00000100,
1030     0xc164, 0xffffffff, 0x00000100,
1031     0x9774, 0xffffffff, 0x00000100,
1032     0x8984, 0xffffffff, 0x06000100,
1033     0x8a18, 0xffffffff, 0x00000100,
1034     0x92a0, 0xffffffff, 0x00000100,
1035     0xc380, 0xffffffff, 0x00000100,
1036     0x8b28, 0xffffffff, 0x00000100,
1037     0x9144, 0xffffffff, 0x00000100,
1038     0x8d88, 0xffffffff, 0x00000100,
1039     0x8d8c, 0xffffffff, 0x00000100,
1040     0x9030, 0xffffffff, 0x00000100,
1041     0x9034, 0xffffffff, 0x00000100,
1042     0x9038, 0xffffffff, 0x00000100,
1043     0x903c, 0xffffffff, 0x00000100,
1044     0xad80, 0xffffffff, 0x00000100,
1045     0xac54, 0xffffffff, 0x00000100,
1046     0x897c, 0xffffffff, 0x06000100,
1047     0x9868, 0xffffffff, 0x00000100,
1048     0x9510, 0xffffffff, 0x00000100,
1049     0xaf04, 0xffffffff, 0x00000100,
1050     0xae04, 0xffffffff, 0x00000100,
1051     0x949c, 0xffffffff, 0x00000100,
1052     0x802c, 0xffffffff, 0xe0000000,
1053     0x9160, 0xffffffff, 0x00010000,
1054     0x9164, 0xffffffff, 0x00030002,
1055     0x9168, 0xffffffff, 0x00040007,
1056     0x916c, 0xffffffff, 0x00060005,
1057     0x9170, 0xffffffff, 0x00090008,
1058     0x9174, 0xffffffff, 0x00020001,
1059     0x9178, 0xffffffff, 0x00040003,
1060     0x917c, 0xffffffff, 0x00000007,
1061     0x9180, 0xffffffff, 0x00060005,
1062     0x9184, 0xffffffff, 0x00090008,
1063     0x9188, 0xffffffff, 0x00030002,
1064     0x918c, 0xffffffff, 0x00050004,
1065     0x9190, 0xffffffff, 0x00000008,
1066     0x9194, 0xffffffff, 0x00070006,
1067     0x9198, 0xffffffff, 0x000a0009,
1068     0x919c, 0xffffffff, 0x00040003,
1069     0x91a0, 0xffffffff, 0x00060005,
1070     0x91a4, 0xffffffff, 0x00000009,
1071     0x91a8, 0xffffffff, 0x00080007,
1072     0x91ac, 0xffffffff, 0x000b000a,
1073     0x91b0, 0xffffffff, 0x00050004,
1074     0x91b4, 0xffffffff, 0x00070006,
1075     0x91b8, 0xffffffff, 0x0008000b,
1076     0x91bc, 0xffffffff, 0x000a0009,
1077     0x91c0, 0xffffffff, 0x000d000c,
1078     0x91c4, 0xffffffff, 0x00060005,
1079     0x91c8, 0xffffffff, 0x00080007,
1080     0x91cc, 0xffffffff, 0x0000000b,
1081     0x91d0, 0xffffffff, 0x000a0009,
1082     0x91d4, 0xffffffff, 0x000d000c,
1083     0x9150, 0xffffffff, 0x96940200,
1084     0x8708, 0xffffffff, 0x00900100,
1085     0xc478, 0xffffffff, 0x00000080,
1086     0xc404, 0xffffffff, 0x0020003f,
1087     0x30, 0xffffffff, 0x0000001c,
1088     0x34, 0x000f0000, 0x000f0000,
1089     0x160c, 0xffffffff, 0x00000100,
1090     0x1024, 0xffffffff, 0x00000100,
1091     0x20a8, 0xffffffff, 0x00000104,
1092     0x264c, 0x000c0000, 0x000c0000,
1093     0x2648, 0x000c0000, 0x000c0000,
1094     0x2f50, 0x00000001, 0x00000001,
1095     0x30cc, 0xc0000fff, 0x00000104,
1096     0xc1e4, 0x00000001, 0x00000001,
1097     0xd0c0, 0xfffffff0, 0x00000100,
1098     0xd8c0, 0xfffffff0, 0x00000100
1099 };
1100 
1101 static u32 verde_pg_init[] =
1102 {
1103     0x353c, 0xffffffff, 0x40000,
1104     0x3538, 0xffffffff, 0x200010ff,
1105     0x353c, 0xffffffff, 0x0,
1106     0x353c, 0xffffffff, 0x0,
1107     0x353c, 0xffffffff, 0x0,
1108     0x353c, 0xffffffff, 0x0,
1109     0x353c, 0xffffffff, 0x0,
1110     0x353c, 0xffffffff, 0x7007,
1111     0x3538, 0xffffffff, 0x300010ff,
1112     0x353c, 0xffffffff, 0x0,
1113     0x353c, 0xffffffff, 0x0,
1114     0x353c, 0xffffffff, 0x0,
1115     0x353c, 0xffffffff, 0x0,
1116     0x353c, 0xffffffff, 0x0,
1117     0x353c, 0xffffffff, 0x400000,
1118     0x3538, 0xffffffff, 0x100010ff,
1119     0x353c, 0xffffffff, 0x0,
1120     0x353c, 0xffffffff, 0x0,
1121     0x353c, 0xffffffff, 0x0,
1122     0x353c, 0xffffffff, 0x0,
1123     0x353c, 0xffffffff, 0x0,
1124     0x353c, 0xffffffff, 0x120200,
1125     0x3538, 0xffffffff, 0x500010ff,
1126     0x353c, 0xffffffff, 0x0,
1127     0x353c, 0xffffffff, 0x0,
1128     0x353c, 0xffffffff, 0x0,
1129     0x353c, 0xffffffff, 0x0,
1130     0x353c, 0xffffffff, 0x0,
1131     0x353c, 0xffffffff, 0x1e1e16,
1132     0x3538, 0xffffffff, 0x600010ff,
1133     0x353c, 0xffffffff, 0x0,
1134     0x353c, 0xffffffff, 0x0,
1135     0x353c, 0xffffffff, 0x0,
1136     0x353c, 0xffffffff, 0x0,
1137     0x353c, 0xffffffff, 0x0,
1138     0x353c, 0xffffffff, 0x171f1e,
1139     0x3538, 0xffffffff, 0x700010ff,
1140     0x353c, 0xffffffff, 0x0,
1141     0x353c, 0xffffffff, 0x0,
1142     0x353c, 0xffffffff, 0x0,
1143     0x353c, 0xffffffff, 0x0,
1144     0x353c, 0xffffffff, 0x0,
1145     0x353c, 0xffffffff, 0x0,
1146     0x3538, 0xffffffff, 0x9ff,
1147     0x3500, 0xffffffff, 0x0,
1148     0x3504, 0xffffffff, 0x10000800,
1149     0x3504, 0xffffffff, 0xf,
1150     0x3504, 0xffffffff, 0xf,
1151     0x3500, 0xffffffff, 0x4,
1152     0x3504, 0xffffffff, 0x1000051e,
1153     0x3504, 0xffffffff, 0xffff,
1154     0x3504, 0xffffffff, 0xffff,
1155     0x3500, 0xffffffff, 0x8,
1156     0x3504, 0xffffffff, 0x80500,
1157     0x3500, 0xffffffff, 0x12,
1158     0x3504, 0xffffffff, 0x9050c,
1159     0x3500, 0xffffffff, 0x1d,
1160     0x3504, 0xffffffff, 0xb052c,
1161     0x3500, 0xffffffff, 0x2a,
1162     0x3504, 0xffffffff, 0x1053e,
1163     0x3500, 0xffffffff, 0x2d,
1164     0x3504, 0xffffffff, 0x10546,
1165     0x3500, 0xffffffff, 0x30,
1166     0x3504, 0xffffffff, 0xa054e,
1167     0x3500, 0xffffffff, 0x3c,
1168     0x3504, 0xffffffff, 0x1055f,
1169     0x3500, 0xffffffff, 0x3f,
1170     0x3504, 0xffffffff, 0x10567,
1171     0x3500, 0xffffffff, 0x42,
1172     0x3504, 0xffffffff, 0x1056f,
1173     0x3500, 0xffffffff, 0x45,
1174     0x3504, 0xffffffff, 0x10572,
1175     0x3500, 0xffffffff, 0x48,
1176     0x3504, 0xffffffff, 0x20575,
1177     0x3500, 0xffffffff, 0x4c,
1178     0x3504, 0xffffffff, 0x190801,
1179     0x3500, 0xffffffff, 0x67,
1180     0x3504, 0xffffffff, 0x1082a,
1181     0x3500, 0xffffffff, 0x6a,
1182     0x3504, 0xffffffff, 0x1b082d,
1183     0x3500, 0xffffffff, 0x87,
1184     0x3504, 0xffffffff, 0x310851,
1185     0x3500, 0xffffffff, 0xba,
1186     0x3504, 0xffffffff, 0x891,
1187     0x3500, 0xffffffff, 0xbc,
1188     0x3504, 0xffffffff, 0x893,
1189     0x3500, 0xffffffff, 0xbe,
1190     0x3504, 0xffffffff, 0x20895,
1191     0x3500, 0xffffffff, 0xc2,
1192     0x3504, 0xffffffff, 0x20899,
1193     0x3500, 0xffffffff, 0xc6,
1194     0x3504, 0xffffffff, 0x2089d,
1195     0x3500, 0xffffffff, 0xca,
1196     0x3504, 0xffffffff, 0x8a1,
1197     0x3500, 0xffffffff, 0xcc,
1198     0x3504, 0xffffffff, 0x8a3,
1199     0x3500, 0xffffffff, 0xce,
1200     0x3504, 0xffffffff, 0x308a5,
1201     0x3500, 0xffffffff, 0xd3,
1202     0x3504, 0xffffffff, 0x6d08cd,
1203     0x3500, 0xffffffff, 0x142,
1204     0x3504, 0xffffffff, 0x2000095a,
1205     0x3504, 0xffffffff, 0x1,
1206     0x3500, 0xffffffff, 0x144,
1207     0x3504, 0xffffffff, 0x301f095b,
1208     0x3500, 0xffffffff, 0x165,
1209     0x3504, 0xffffffff, 0xc094d,
1210     0x3500, 0xffffffff, 0x173,
1211     0x3504, 0xffffffff, 0xf096d,
1212     0x3500, 0xffffffff, 0x184,
1213     0x3504, 0xffffffff, 0x15097f,
1214     0x3500, 0xffffffff, 0x19b,
1215     0x3504, 0xffffffff, 0xc0998,
1216     0x3500, 0xffffffff, 0x1a9,
1217     0x3504, 0xffffffff, 0x409a7,
1218     0x3500, 0xffffffff, 0x1af,
1219     0x3504, 0xffffffff, 0xcdc,
1220     0x3500, 0xffffffff, 0x1b1,
1221     0x3504, 0xffffffff, 0x800,
1222     0x3508, 0xffffffff, 0x6c9b2000,
1223     0x3510, 0xfc00, 0x2000,
1224     0x3544, 0xffffffff, 0xfc0,
1225     0x28d4, 0x00000100, 0x100
1226 };
1227 
1228 static void si_init_golden_registers(struct radeon_device *rdev)
1229 {
1230     switch (rdev->family) {
1231     case CHIP_TAHITI:
1232         radeon_program_register_sequence(rdev,
1233                          tahiti_golden_registers,
1234                          (const u32)ARRAY_SIZE(tahiti_golden_registers));
1235         radeon_program_register_sequence(rdev,
1236                          tahiti_golden_rlc_registers,
1237                          (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1238         radeon_program_register_sequence(rdev,
1239                          tahiti_mgcg_cgcg_init,
1240                          (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1241         radeon_program_register_sequence(rdev,
1242                          tahiti_golden_registers2,
1243                          (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1244         break;
1245     case CHIP_PITCAIRN:
1246         radeon_program_register_sequence(rdev,
1247                          pitcairn_golden_registers,
1248                          (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1249         radeon_program_register_sequence(rdev,
1250                          pitcairn_golden_rlc_registers,
1251                          (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1252         radeon_program_register_sequence(rdev,
1253                          pitcairn_mgcg_cgcg_init,
1254                          (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1255         break;
1256     case CHIP_VERDE:
1257         radeon_program_register_sequence(rdev,
1258                          verde_golden_registers,
1259                          (const u32)ARRAY_SIZE(verde_golden_registers));
1260         radeon_program_register_sequence(rdev,
1261                          verde_golden_rlc_registers,
1262                          (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1263         radeon_program_register_sequence(rdev,
1264                          verde_mgcg_cgcg_init,
1265                          (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1266         radeon_program_register_sequence(rdev,
1267                          verde_pg_init,
1268                          (const u32)ARRAY_SIZE(verde_pg_init));
1269         break;
1270     case CHIP_OLAND:
1271         radeon_program_register_sequence(rdev,
1272                          oland_golden_registers,
1273                          (const u32)ARRAY_SIZE(oland_golden_registers));
1274         radeon_program_register_sequence(rdev,
1275                          oland_golden_rlc_registers,
1276                          (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1277         radeon_program_register_sequence(rdev,
1278                          oland_mgcg_cgcg_init,
1279                          (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1280         break;
1281     case CHIP_HAINAN:
1282         radeon_program_register_sequence(rdev,
1283                          hainan_golden_registers,
1284                          (const u32)ARRAY_SIZE(hainan_golden_registers));
1285         radeon_program_register_sequence(rdev,
1286                          hainan_golden_registers2,
1287                          (const u32)ARRAY_SIZE(hainan_golden_registers2));
1288         radeon_program_register_sequence(rdev,
1289                          hainan_mgcg_cgcg_init,
1290                          (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1291         break;
1292     default:
1293         break;
1294     }
1295 }
1296 
1297 /**
1298  * si_get_allowed_info_register - fetch the register for the info ioctl
1299  *
1300  * @rdev: radeon_device pointer
1301  * @reg: register offset in bytes
1302  * @val: register value
1303  *
1304  * Returns 0 for success or -EINVAL for an invalid register
1305  *
1306  */
1307 int si_get_allowed_info_register(struct radeon_device *rdev,
1308                  u32 reg, u32 *val)
1309 {
1310     switch (reg) {
1311     case GRBM_STATUS:
1312     case GRBM_STATUS2:
1313     case GRBM_STATUS_SE0:
1314     case GRBM_STATUS_SE1:
1315     case SRBM_STATUS:
1316     case SRBM_STATUS2:
1317     case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1318     case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1319     case UVD_STATUS:
1320         *val = RREG32(reg);
1321         return 0;
1322     default:
1323         return -EINVAL;
1324     }
1325 }
1326 
1327 #define PCIE_BUS_CLK                10000
1328 #define TCLK                        (PCIE_BUS_CLK / 10)
1329 
1330 /**
1331  * si_get_xclk - get the xclk
1332  *
1333  * @rdev: radeon_device pointer
1334  *
1335  * Returns the reference clock used by the gfx engine
1336  * (SI).
1337  */
1338 u32 si_get_xclk(struct radeon_device *rdev)
1339 {
1340     u32 reference_clock = rdev->clock.spll.reference_freq;
1341     u32 tmp;
1342 
1343     tmp = RREG32(CG_CLKPIN_CNTL_2);
1344     if (tmp & MUX_TCLK_TO_XCLK)
1345         return TCLK;
1346 
1347     tmp = RREG32(CG_CLKPIN_CNTL);
1348     if (tmp & XTALIN_DIVIDE)
1349         return reference_clock / 4;
1350 
1351     return reference_clock;
1352 }
1353 
1354 /* get temperature in millidegrees */
1355 int si_get_temp(struct radeon_device *rdev)
1356 {
1357     u32 temp;
1358     int actual_temp = 0;
1359 
1360     temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1361         CTF_TEMP_SHIFT;
1362 
1363     if (temp & 0x200)
1364         actual_temp = 255;
1365     else
1366         actual_temp = temp & 0x1ff;
1367 
1368     actual_temp = (actual_temp * 1000);
1369 
1370     return actual_temp;
1371 }
1372 
1373 #define TAHITI_IO_MC_REGS_SIZE 36
1374 
1375 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1376     {0x0000006f, 0x03044000},
1377     {0x00000070, 0x0480c018},
1378     {0x00000071, 0x00000040},
1379     {0x00000072, 0x01000000},
1380     {0x00000074, 0x000000ff},
1381     {0x00000075, 0x00143400},
1382     {0x00000076, 0x08ec0800},
1383     {0x00000077, 0x040000cc},
1384     {0x00000079, 0x00000000},
1385     {0x0000007a, 0x21000409},
1386     {0x0000007c, 0x00000000},
1387     {0x0000007d, 0xe8000000},
1388     {0x0000007e, 0x044408a8},
1389     {0x0000007f, 0x00000003},
1390     {0x00000080, 0x00000000},
1391     {0x00000081, 0x01000000},
1392     {0x00000082, 0x02000000},
1393     {0x00000083, 0x00000000},
1394     {0x00000084, 0xe3f3e4f4},
1395     {0x00000085, 0x00052024},
1396     {0x00000087, 0x00000000},
1397     {0x00000088, 0x66036603},
1398     {0x00000089, 0x01000000},
1399     {0x0000008b, 0x1c0a0000},
1400     {0x0000008c, 0xff010000},
1401     {0x0000008e, 0xffffefff},
1402     {0x0000008f, 0xfff3efff},
1403     {0x00000090, 0xfff3efbf},
1404     {0x00000094, 0x00101101},
1405     {0x00000095, 0x00000fff},
1406     {0x00000096, 0x00116fff},
1407     {0x00000097, 0x60010000},
1408     {0x00000098, 0x10010000},
1409     {0x00000099, 0x00006000},
1410     {0x0000009a, 0x00001000},
1411     {0x0000009f, 0x00a77400}
1412 };
1413 
1414 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1415     {0x0000006f, 0x03044000},
1416     {0x00000070, 0x0480c018},
1417     {0x00000071, 0x00000040},
1418     {0x00000072, 0x01000000},
1419     {0x00000074, 0x000000ff},
1420     {0x00000075, 0x00143400},
1421     {0x00000076, 0x08ec0800},
1422     {0x00000077, 0x040000cc},
1423     {0x00000079, 0x00000000},
1424     {0x0000007a, 0x21000409},
1425     {0x0000007c, 0x00000000},
1426     {0x0000007d, 0xe8000000},
1427     {0x0000007e, 0x044408a8},
1428     {0x0000007f, 0x00000003},
1429     {0x00000080, 0x00000000},
1430     {0x00000081, 0x01000000},
1431     {0x00000082, 0x02000000},
1432     {0x00000083, 0x00000000},
1433     {0x00000084, 0xe3f3e4f4},
1434     {0x00000085, 0x00052024},
1435     {0x00000087, 0x00000000},
1436     {0x00000088, 0x66036603},
1437     {0x00000089, 0x01000000},
1438     {0x0000008b, 0x1c0a0000},
1439     {0x0000008c, 0xff010000},
1440     {0x0000008e, 0xffffefff},
1441     {0x0000008f, 0xfff3efff},
1442     {0x00000090, 0xfff3efbf},
1443     {0x00000094, 0x00101101},
1444     {0x00000095, 0x00000fff},
1445     {0x00000096, 0x00116fff},
1446     {0x00000097, 0x60010000},
1447     {0x00000098, 0x10010000},
1448     {0x00000099, 0x00006000},
1449     {0x0000009a, 0x00001000},
1450     {0x0000009f, 0x00a47400}
1451 };
1452 
1453 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1454     {0x0000006f, 0x03044000},
1455     {0x00000070, 0x0480c018},
1456     {0x00000071, 0x00000040},
1457     {0x00000072, 0x01000000},
1458     {0x00000074, 0x000000ff},
1459     {0x00000075, 0x00143400},
1460     {0x00000076, 0x08ec0800},
1461     {0x00000077, 0x040000cc},
1462     {0x00000079, 0x00000000},
1463     {0x0000007a, 0x21000409},
1464     {0x0000007c, 0x00000000},
1465     {0x0000007d, 0xe8000000},
1466     {0x0000007e, 0x044408a8},
1467     {0x0000007f, 0x00000003},
1468     {0x00000080, 0x00000000},
1469     {0x00000081, 0x01000000},
1470     {0x00000082, 0x02000000},
1471     {0x00000083, 0x00000000},
1472     {0x00000084, 0xe3f3e4f4},
1473     {0x00000085, 0x00052024},
1474     {0x00000087, 0x00000000},
1475     {0x00000088, 0x66036603},
1476     {0x00000089, 0x01000000},
1477     {0x0000008b, 0x1c0a0000},
1478     {0x0000008c, 0xff010000},
1479     {0x0000008e, 0xffffefff},
1480     {0x0000008f, 0xfff3efff},
1481     {0x00000090, 0xfff3efbf},
1482     {0x00000094, 0x00101101},
1483     {0x00000095, 0x00000fff},
1484     {0x00000096, 0x00116fff},
1485     {0x00000097, 0x60010000},
1486     {0x00000098, 0x10010000},
1487     {0x00000099, 0x00006000},
1488     {0x0000009a, 0x00001000},
1489     {0x0000009f, 0x00a37400}
1490 };
1491 
1492 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1493     {0x0000006f, 0x03044000},
1494     {0x00000070, 0x0480c018},
1495     {0x00000071, 0x00000040},
1496     {0x00000072, 0x01000000},
1497     {0x00000074, 0x000000ff},
1498     {0x00000075, 0x00143400},
1499     {0x00000076, 0x08ec0800},
1500     {0x00000077, 0x040000cc},
1501     {0x00000079, 0x00000000},
1502     {0x0000007a, 0x21000409},
1503     {0x0000007c, 0x00000000},
1504     {0x0000007d, 0xe8000000},
1505     {0x0000007e, 0x044408a8},
1506     {0x0000007f, 0x00000003},
1507     {0x00000080, 0x00000000},
1508     {0x00000081, 0x01000000},
1509     {0x00000082, 0x02000000},
1510     {0x00000083, 0x00000000},
1511     {0x00000084, 0xe3f3e4f4},
1512     {0x00000085, 0x00052024},
1513     {0x00000087, 0x00000000},
1514     {0x00000088, 0x66036603},
1515     {0x00000089, 0x01000000},
1516     {0x0000008b, 0x1c0a0000},
1517     {0x0000008c, 0xff010000},
1518     {0x0000008e, 0xffffefff},
1519     {0x0000008f, 0xfff3efff},
1520     {0x00000090, 0xfff3efbf},
1521     {0x00000094, 0x00101101},
1522     {0x00000095, 0x00000fff},
1523     {0x00000096, 0x00116fff},
1524     {0x00000097, 0x60010000},
1525     {0x00000098, 0x10010000},
1526     {0x00000099, 0x00006000},
1527     {0x0000009a, 0x00001000},
1528     {0x0000009f, 0x00a17730}
1529 };
1530 
1531 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1532     {0x0000006f, 0x03044000},
1533     {0x00000070, 0x0480c018},
1534     {0x00000071, 0x00000040},
1535     {0x00000072, 0x01000000},
1536     {0x00000074, 0x000000ff},
1537     {0x00000075, 0x00143400},
1538     {0x00000076, 0x08ec0800},
1539     {0x00000077, 0x040000cc},
1540     {0x00000079, 0x00000000},
1541     {0x0000007a, 0x21000409},
1542     {0x0000007c, 0x00000000},
1543     {0x0000007d, 0xe8000000},
1544     {0x0000007e, 0x044408a8},
1545     {0x0000007f, 0x00000003},
1546     {0x00000080, 0x00000000},
1547     {0x00000081, 0x01000000},
1548     {0x00000082, 0x02000000},
1549     {0x00000083, 0x00000000},
1550     {0x00000084, 0xe3f3e4f4},
1551     {0x00000085, 0x00052024},
1552     {0x00000087, 0x00000000},
1553     {0x00000088, 0x66036603},
1554     {0x00000089, 0x01000000},
1555     {0x0000008b, 0x1c0a0000},
1556     {0x0000008c, 0xff010000},
1557     {0x0000008e, 0xffffefff},
1558     {0x0000008f, 0xfff3efff},
1559     {0x00000090, 0xfff3efbf},
1560     {0x00000094, 0x00101101},
1561     {0x00000095, 0x00000fff},
1562     {0x00000096, 0x00116fff},
1563     {0x00000097, 0x60010000},
1564     {0x00000098, 0x10010000},
1565     {0x00000099, 0x00006000},
1566     {0x0000009a, 0x00001000},
1567     {0x0000009f, 0x00a07730}
1568 };
1569 
1570 /* ucode loading */
1571 int si_mc_load_microcode(struct radeon_device *rdev)
1572 {
1573     const __be32 *fw_data = NULL;
1574     const __le32 *new_fw_data = NULL;
1575     u32 running;
1576     u32 *io_mc_regs = NULL;
1577     const __le32 *new_io_mc_regs = NULL;
1578     int i, regs_size, ucode_size;
1579 
1580     if (!rdev->mc_fw)
1581         return -EINVAL;
1582 
1583     if (rdev->new_fw) {
1584         const struct mc_firmware_header_v1_0 *hdr =
1585             (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1586 
1587         radeon_ucode_print_mc_hdr(&hdr->header);
1588         regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1589         new_io_mc_regs = (const __le32 *)
1590             (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1591         ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1592         new_fw_data = (const __le32 *)
1593             (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1594     } else {
1595         ucode_size = rdev->mc_fw->size / 4;
1596 
1597         switch (rdev->family) {
1598         case CHIP_TAHITI:
1599             io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1600             regs_size = TAHITI_IO_MC_REGS_SIZE;
1601             break;
1602         case CHIP_PITCAIRN:
1603             io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1604             regs_size = TAHITI_IO_MC_REGS_SIZE;
1605             break;
1606         case CHIP_VERDE:
1607         default:
1608             io_mc_regs = (u32 *)&verde_io_mc_regs;
1609             regs_size = TAHITI_IO_MC_REGS_SIZE;
1610             break;
1611         case CHIP_OLAND:
1612             io_mc_regs = (u32 *)&oland_io_mc_regs;
1613             regs_size = TAHITI_IO_MC_REGS_SIZE;
1614             break;
1615         case CHIP_HAINAN:
1616             io_mc_regs = (u32 *)&hainan_io_mc_regs;
1617             regs_size = TAHITI_IO_MC_REGS_SIZE;
1618             break;
1619         }
1620         fw_data = (const __be32 *)rdev->mc_fw->data;
1621     }
1622 
1623     running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1624 
1625     if (running == 0) {
1626         /* reset the engine and set to writable */
1627         WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1628         WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1629 
1630         /* load mc io regs */
1631         for (i = 0; i < regs_size; i++) {
1632             if (rdev->new_fw) {
1633                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1634                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1635             } else {
1636                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1637                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1638             }
1639         }
1640         /* load the MC ucode */
1641         for (i = 0; i < ucode_size; i++) {
1642             if (rdev->new_fw)
1643                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1644             else
1645                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1646         }
1647 
1648         /* put the engine back into the active state */
1649         WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1650         WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1651         WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1652 
1653         /* wait for training to complete */
1654         for (i = 0; i < rdev->usec_timeout; i++) {
1655             if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1656                 break;
1657             udelay(1);
1658         }
1659         for (i = 0; i < rdev->usec_timeout; i++) {
1660             if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1661                 break;
1662             udelay(1);
1663         }
1664     }
1665 
1666     return 0;
1667 }
1668 
1669 static int si_init_microcode(struct radeon_device *rdev)
1670 {
1671     const char *chip_name;
1672     const char *new_chip_name;
1673     size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1674     size_t smc_req_size, mc2_req_size;
1675     char fw_name[30];
1676     int err;
1677     int new_fw = 0;
1678     bool new_smc = false;
1679     bool si58_fw = false;
1680     bool banks2_fw = false;
1681 
1682     DRM_DEBUG("\n");
1683 
1684     switch (rdev->family) {
1685     case CHIP_TAHITI:
1686         chip_name = "TAHITI";
1687         new_chip_name = "tahiti";
1688         pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1689         me_req_size = SI_PM4_UCODE_SIZE * 4;
1690         ce_req_size = SI_CE_UCODE_SIZE * 4;
1691         rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1692         mc_req_size = SI_MC_UCODE_SIZE * 4;
1693         mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1694         smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1695         break;
1696     case CHIP_PITCAIRN:
1697         chip_name = "PITCAIRN";
1698         if ((rdev->pdev->revision == 0x81) &&
1699             ((rdev->pdev->device == 0x6810) ||
1700              (rdev->pdev->device == 0x6811)))
1701             new_smc = true;
1702         new_chip_name = "pitcairn";
1703         pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1704         me_req_size = SI_PM4_UCODE_SIZE * 4;
1705         ce_req_size = SI_CE_UCODE_SIZE * 4;
1706         rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1707         mc_req_size = SI_MC_UCODE_SIZE * 4;
1708         mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1709         smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1710         break;
1711     case CHIP_VERDE:
1712         chip_name = "VERDE";
1713         if (((rdev->pdev->device == 0x6820) &&
1714              ((rdev->pdev->revision == 0x81) ||
1715               (rdev->pdev->revision == 0x83))) ||
1716             ((rdev->pdev->device == 0x6821) &&
1717              ((rdev->pdev->revision == 0x83) ||
1718               (rdev->pdev->revision == 0x87))) ||
1719             ((rdev->pdev->revision == 0x87) &&
1720              ((rdev->pdev->device == 0x6823) ||
1721               (rdev->pdev->device == 0x682b))))
1722             new_smc = true;
1723         new_chip_name = "verde";
1724         pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1725         me_req_size = SI_PM4_UCODE_SIZE * 4;
1726         ce_req_size = SI_CE_UCODE_SIZE * 4;
1727         rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1728         mc_req_size = SI_MC_UCODE_SIZE * 4;
1729         mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1730         smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1731         break;
1732     case CHIP_OLAND:
1733         chip_name = "OLAND";
1734         if (((rdev->pdev->revision == 0x81) &&
1735              ((rdev->pdev->device == 0x6600) ||
1736               (rdev->pdev->device == 0x6604) ||
1737               (rdev->pdev->device == 0x6605) ||
1738               (rdev->pdev->device == 0x6610))) ||
1739             ((rdev->pdev->revision == 0x83) &&
1740              (rdev->pdev->device == 0x6610)))
1741             new_smc = true;
1742         new_chip_name = "oland";
1743         pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1744         me_req_size = SI_PM4_UCODE_SIZE * 4;
1745         ce_req_size = SI_CE_UCODE_SIZE * 4;
1746         rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1747         mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1748         smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1749         break;
1750     case CHIP_HAINAN:
1751         chip_name = "HAINAN";
1752         if (((rdev->pdev->revision == 0x81) &&
1753              (rdev->pdev->device == 0x6660)) ||
1754             ((rdev->pdev->revision == 0x83) &&
1755              ((rdev->pdev->device == 0x6660) ||
1756               (rdev->pdev->device == 0x6663) ||
1757               (rdev->pdev->device == 0x6665) ||
1758               (rdev->pdev->device == 0x6667))))
1759             new_smc = true;
1760         else if ((rdev->pdev->revision == 0xc3) &&
1761              (rdev->pdev->device == 0x6665))
1762             banks2_fw = true;
1763         new_chip_name = "hainan";
1764         pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1765         me_req_size = SI_PM4_UCODE_SIZE * 4;
1766         ce_req_size = SI_CE_UCODE_SIZE * 4;
1767         rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1768         mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1769         smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1770         break;
1771     default: BUG();
1772     }
1773 
1774     /* this memory configuration requires special firmware */
1775     if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1776         si58_fw = true;
1777 
1778     DRM_INFO("Loading %s Microcode\n", new_chip_name);
1779 
1780     snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1781     err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1782     if (err) {
1783         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1784         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1785         if (err)
1786             goto out;
1787         if (rdev->pfp_fw->size != pfp_req_size) {
1788             pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1789                    rdev->pfp_fw->size, fw_name);
1790             err = -EINVAL;
1791             goto out;
1792         }
1793     } else {
1794         err = radeon_ucode_validate(rdev->pfp_fw);
1795         if (err) {
1796             pr_err("si_cp: validation failed for firmware \"%s\"\n",
1797                    fw_name);
1798             goto out;
1799         } else {
1800             new_fw++;
1801         }
1802     }
1803 
1804     snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1805     err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1806     if (err) {
1807         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1808         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1809         if (err)
1810             goto out;
1811         if (rdev->me_fw->size != me_req_size) {
1812             pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1813                    rdev->me_fw->size, fw_name);
1814             err = -EINVAL;
1815         }
1816     } else {
1817         err = radeon_ucode_validate(rdev->me_fw);
1818         if (err) {
1819             pr_err("si_cp: validation failed for firmware \"%s\"\n",
1820                    fw_name);
1821             goto out;
1822         } else {
1823             new_fw++;
1824         }
1825     }
1826 
1827     snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1828     err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1829     if (err) {
1830         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1831         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1832         if (err)
1833             goto out;
1834         if (rdev->ce_fw->size != ce_req_size) {
1835             pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1836                    rdev->ce_fw->size, fw_name);
1837             err = -EINVAL;
1838         }
1839     } else {
1840         err = radeon_ucode_validate(rdev->ce_fw);
1841         if (err) {
1842             pr_err("si_cp: validation failed for firmware \"%s\"\n",
1843                    fw_name);
1844             goto out;
1845         } else {
1846             new_fw++;
1847         }
1848     }
1849 
1850     snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1851     err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1852     if (err) {
1853         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1854         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1855         if (err)
1856             goto out;
1857         if (rdev->rlc_fw->size != rlc_req_size) {
1858             pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1859                    rdev->rlc_fw->size, fw_name);
1860             err = -EINVAL;
1861         }
1862     } else {
1863         err = radeon_ucode_validate(rdev->rlc_fw);
1864         if (err) {
1865             pr_err("si_cp: validation failed for firmware \"%s\"\n",
1866                    fw_name);
1867             goto out;
1868         } else {
1869             new_fw++;
1870         }
1871     }
1872 
1873     if (si58_fw)
1874         snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1875     else
1876         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1877     err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1878     if (err) {
1879         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1880         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1881         if (err) {
1882             snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1883             err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1884             if (err)
1885                 goto out;
1886         }
1887         if ((rdev->mc_fw->size != mc_req_size) &&
1888             (rdev->mc_fw->size != mc2_req_size)) {
1889             pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1890                    rdev->mc_fw->size, fw_name);
1891             err = -EINVAL;
1892         }
1893         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1894     } else {
1895         err = radeon_ucode_validate(rdev->mc_fw);
1896         if (err) {
1897             pr_err("si_cp: validation failed for firmware \"%s\"\n",
1898                    fw_name);
1899             goto out;
1900         } else {
1901             new_fw++;
1902         }
1903     }
1904 
1905     if (banks2_fw)
1906         snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1907     else if (new_smc)
1908         snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1909     else
1910         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1911     err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1912     if (err) {
1913         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1914         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1915         if (err) {
1916             pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1917             release_firmware(rdev->smc_fw);
1918             rdev->smc_fw = NULL;
1919             err = 0;
1920         } else if (rdev->smc_fw->size != smc_req_size) {
1921             pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1922                    rdev->smc_fw->size, fw_name);
1923             err = -EINVAL;
1924         }
1925     } else {
1926         err = radeon_ucode_validate(rdev->smc_fw);
1927         if (err) {
1928             pr_err("si_cp: validation failed for firmware \"%s\"\n",
1929                    fw_name);
1930             goto out;
1931         } else {
1932             new_fw++;
1933         }
1934     }
1935 
1936     if (new_fw == 0) {
1937         rdev->new_fw = false;
1938     } else if (new_fw < 6) {
1939         pr_err("si_fw: mixing new and old firmware!\n");
1940         err = -EINVAL;
1941     } else {
1942         rdev->new_fw = true;
1943     }
1944 out:
1945     if (err) {
1946         if (err != -EINVAL)
1947             pr_err("si_cp: Failed to load firmware \"%s\"\n",
1948                    fw_name);
1949         release_firmware(rdev->pfp_fw);
1950         rdev->pfp_fw = NULL;
1951         release_firmware(rdev->me_fw);
1952         rdev->me_fw = NULL;
1953         release_firmware(rdev->ce_fw);
1954         rdev->ce_fw = NULL;
1955         release_firmware(rdev->rlc_fw);
1956         rdev->rlc_fw = NULL;
1957         release_firmware(rdev->mc_fw);
1958         rdev->mc_fw = NULL;
1959         release_firmware(rdev->smc_fw);
1960         rdev->smc_fw = NULL;
1961     }
1962     return err;
1963 }
1964 
1965 /* watermark setup */
1966 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1967                    struct radeon_crtc *radeon_crtc,
1968                    struct drm_display_mode *mode,
1969                    struct drm_display_mode *other_mode)
1970 {
1971     u32 tmp, buffer_alloc, i;
1972     u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1973     /*
1974      * Line Buffer Setup
1975      * There are 3 line buffers, each one shared by 2 display controllers.
1976      * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1977      * the display controllers.  The paritioning is done via one of four
1978      * preset allocations specified in bits 21:20:
1979      *  0 - half lb
1980      *  2 - whole lb, other crtc must be disabled
1981      */
1982     /* this can get tricky if we have two large displays on a paired group
1983      * of crtcs.  Ideally for multiple large displays we'd assign them to
1984      * non-linked crtcs for maximum line buffer allocation.
1985      */
1986     if (radeon_crtc->base.enabled && mode) {
1987         if (other_mode) {
1988             tmp = 0; /* 1/2 */
1989             buffer_alloc = 1;
1990         } else {
1991             tmp = 2; /* whole */
1992             buffer_alloc = 2;
1993         }
1994     } else {
1995         tmp = 0;
1996         buffer_alloc = 0;
1997     }
1998 
1999     WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
2000            DC_LB_MEMORY_CONFIG(tmp));
2001 
2002     WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
2003            DMIF_BUFFERS_ALLOCATED(buffer_alloc));
2004     for (i = 0; i < rdev->usec_timeout; i++) {
2005         if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
2006             DMIF_BUFFERS_ALLOCATED_COMPLETED)
2007             break;
2008         udelay(1);
2009     }
2010 
2011     if (radeon_crtc->base.enabled && mode) {
2012         switch (tmp) {
2013         case 0:
2014         default:
2015             return 4096 * 2;
2016         case 2:
2017             return 8192 * 2;
2018         }
2019     }
2020 
2021     /* controller not enabled, so no lb used */
2022     return 0;
2023 }
2024 
2025 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2026 {
2027     u32 tmp = RREG32(MC_SHARED_CHMAP);
2028 
2029     switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2030     case 0:
2031     default:
2032         return 1;
2033     case 1:
2034         return 2;
2035     case 2:
2036         return 4;
2037     case 3:
2038         return 8;
2039     case 4:
2040         return 3;
2041     case 5:
2042         return 6;
2043     case 6:
2044         return 10;
2045     case 7:
2046         return 12;
2047     case 8:
2048         return 16;
2049     }
2050 }
2051 
2052 struct dce6_wm_params {
2053     u32 dram_channels; /* number of dram channels */
2054     u32 yclk;          /* bandwidth per dram data pin in kHz */
2055     u32 sclk;          /* engine clock in kHz */
2056     u32 disp_clk;      /* display clock in kHz */
2057     u32 src_width;     /* viewport width */
2058     u32 active_time;   /* active display time in ns */
2059     u32 blank_time;    /* blank time in ns */
2060     bool interlaced;    /* mode is interlaced */
2061     fixed20_12 vsc;    /* vertical scale ratio */
2062     u32 num_heads;     /* number of active crtcs */
2063     u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2064     u32 lb_size;       /* line buffer allocated to pipe */
2065     u32 vtaps;         /* vertical scaler taps */
2066 };
2067 
2068 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2069 {
2070     /* Calculate raw DRAM Bandwidth */
2071     fixed20_12 dram_efficiency; /* 0.7 */
2072     fixed20_12 yclk, dram_channels, bandwidth;
2073     fixed20_12 a;
2074 
2075     a.full = dfixed_const(1000);
2076     yclk.full = dfixed_const(wm->yclk);
2077     yclk.full = dfixed_div(yclk, a);
2078     dram_channels.full = dfixed_const(wm->dram_channels * 4);
2079     a.full = dfixed_const(10);
2080     dram_efficiency.full = dfixed_const(7);
2081     dram_efficiency.full = dfixed_div(dram_efficiency, a);
2082     bandwidth.full = dfixed_mul(dram_channels, yclk);
2083     bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2084 
2085     return dfixed_trunc(bandwidth);
2086 }
2087 
2088 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2089 {
2090     /* Calculate DRAM Bandwidth and the part allocated to display. */
2091     fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2092     fixed20_12 yclk, dram_channels, bandwidth;
2093     fixed20_12 a;
2094 
2095     a.full = dfixed_const(1000);
2096     yclk.full = dfixed_const(wm->yclk);
2097     yclk.full = dfixed_div(yclk, a);
2098     dram_channels.full = dfixed_const(wm->dram_channels * 4);
2099     a.full = dfixed_const(10);
2100     disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2101     disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2102     bandwidth.full = dfixed_mul(dram_channels, yclk);
2103     bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2104 
2105     return dfixed_trunc(bandwidth);
2106 }
2107 
2108 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2109 {
2110     /* Calculate the display Data return Bandwidth */
2111     fixed20_12 return_efficiency; /* 0.8 */
2112     fixed20_12 sclk, bandwidth;
2113     fixed20_12 a;
2114 
2115     a.full = dfixed_const(1000);
2116     sclk.full = dfixed_const(wm->sclk);
2117     sclk.full = dfixed_div(sclk, a);
2118     a.full = dfixed_const(10);
2119     return_efficiency.full = dfixed_const(8);
2120     return_efficiency.full = dfixed_div(return_efficiency, a);
2121     a.full = dfixed_const(32);
2122     bandwidth.full = dfixed_mul(a, sclk);
2123     bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2124 
2125     return dfixed_trunc(bandwidth);
2126 }
2127 
2128 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2129 {
2130     return 32;
2131 }
2132 
2133 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2134 {
2135     /* Calculate the DMIF Request Bandwidth */
2136     fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2137     fixed20_12 disp_clk, sclk, bandwidth;
2138     fixed20_12 a, b1, b2;
2139     u32 min_bandwidth;
2140 
2141     a.full = dfixed_const(1000);
2142     disp_clk.full = dfixed_const(wm->disp_clk);
2143     disp_clk.full = dfixed_div(disp_clk, a);
2144     a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2145     b1.full = dfixed_mul(a, disp_clk);
2146 
2147     a.full = dfixed_const(1000);
2148     sclk.full = dfixed_const(wm->sclk);
2149     sclk.full = dfixed_div(sclk, a);
2150     a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2151     b2.full = dfixed_mul(a, sclk);
2152 
2153     a.full = dfixed_const(10);
2154     disp_clk_request_efficiency.full = dfixed_const(8);
2155     disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2156 
2157     min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2158 
2159     a.full = dfixed_const(min_bandwidth);
2160     bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2161 
2162     return dfixed_trunc(bandwidth);
2163 }
2164 
2165 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2166 {
2167     /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2168     u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2169     u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2170     u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2171 
2172     return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2173 }
2174 
2175 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2176 {
2177     /* Calculate the display mode Average Bandwidth
2178      * DisplayMode should contain the source and destination dimensions,
2179      * timing, etc.
2180      */
2181     fixed20_12 bpp;
2182     fixed20_12 line_time;
2183     fixed20_12 src_width;
2184     fixed20_12 bandwidth;
2185     fixed20_12 a;
2186 
2187     a.full = dfixed_const(1000);
2188     line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2189     line_time.full = dfixed_div(line_time, a);
2190     bpp.full = dfixed_const(wm->bytes_per_pixel);
2191     src_width.full = dfixed_const(wm->src_width);
2192     bandwidth.full = dfixed_mul(src_width, bpp);
2193     bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2194     bandwidth.full = dfixed_div(bandwidth, line_time);
2195 
2196     return dfixed_trunc(bandwidth);
2197 }
2198 
2199 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2200 {
2201     /* First calcualte the latency in ns */
2202     u32 mc_latency = 2000; /* 2000 ns. */
2203     u32 available_bandwidth = dce6_available_bandwidth(wm);
2204     u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2205     u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2206     u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2207     u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2208         (wm->num_heads * cursor_line_pair_return_time);
2209     u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2210     u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2211     u32 tmp, dmif_size = 12288;
2212     fixed20_12 a, b, c;
2213 
2214     if (wm->num_heads == 0)
2215         return 0;
2216 
2217     a.full = dfixed_const(2);
2218     b.full = dfixed_const(1);
2219     if ((wm->vsc.full > a.full) ||
2220         ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2221         (wm->vtaps >= 5) ||
2222         ((wm->vsc.full >= a.full) && wm->interlaced))
2223         max_src_lines_per_dst_line = 4;
2224     else
2225         max_src_lines_per_dst_line = 2;
2226 
2227     a.full = dfixed_const(available_bandwidth);
2228     b.full = dfixed_const(wm->num_heads);
2229     a.full = dfixed_div(a, b);
2230     tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2231     tmp = min(dfixed_trunc(a), tmp);
2232 
2233     lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2234 
2235     a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2236     b.full = dfixed_const(1000);
2237     c.full = dfixed_const(lb_fill_bw);
2238     b.full = dfixed_div(c, b);
2239     a.full = dfixed_div(a, b);
2240     line_fill_time = dfixed_trunc(a);
2241 
2242     if (line_fill_time < wm->active_time)
2243         return latency;
2244     else
2245         return latency + (line_fill_time - wm->active_time);
2246 
2247 }
2248 
2249 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2250 {
2251     if (dce6_average_bandwidth(wm) <=
2252         (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2253         return true;
2254     else
2255         return false;
2256 };
2257 
2258 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2259 {
2260     if (dce6_average_bandwidth(wm) <=
2261         (dce6_available_bandwidth(wm) / wm->num_heads))
2262         return true;
2263     else
2264         return false;
2265 };
2266 
2267 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2268 {
2269     u32 lb_partitions = wm->lb_size / wm->src_width;
2270     u32 line_time = wm->active_time + wm->blank_time;
2271     u32 latency_tolerant_lines;
2272     u32 latency_hiding;
2273     fixed20_12 a;
2274 
2275     a.full = dfixed_const(1);
2276     if (wm->vsc.full > a.full)
2277         latency_tolerant_lines = 1;
2278     else {
2279         if (lb_partitions <= (wm->vtaps + 1))
2280             latency_tolerant_lines = 1;
2281         else
2282             latency_tolerant_lines = 2;
2283     }
2284 
2285     latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2286 
2287     if (dce6_latency_watermark(wm) <= latency_hiding)
2288         return true;
2289     else
2290         return false;
2291 }
2292 
2293 static void dce6_program_watermarks(struct radeon_device *rdev,
2294                      struct radeon_crtc *radeon_crtc,
2295                      u32 lb_size, u32 num_heads)
2296 {
2297     struct drm_display_mode *mode = &radeon_crtc->base.mode;
2298     struct dce6_wm_params wm_low, wm_high;
2299     u32 dram_channels;
2300     u32 active_time;
2301     u32 line_time = 0;
2302     u32 latency_watermark_a = 0, latency_watermark_b = 0;
2303     u32 priority_a_mark = 0, priority_b_mark = 0;
2304     u32 priority_a_cnt = PRIORITY_OFF;
2305     u32 priority_b_cnt = PRIORITY_OFF;
2306     u32 tmp, arb_control3;
2307     fixed20_12 a, b, c;
2308 
2309     if (radeon_crtc->base.enabled && num_heads && mode) {
2310         active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2311                         (u32)mode->clock);
2312         line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2313                       (u32)mode->clock);
2314         line_time = min(line_time, (u32)65535);
2315         priority_a_cnt = 0;
2316         priority_b_cnt = 0;
2317 
2318         if (rdev->family == CHIP_ARUBA)
2319             dram_channels = evergreen_get_number_of_dram_channels(rdev);
2320         else
2321             dram_channels = si_get_number_of_dram_channels(rdev);
2322 
2323         /* watermark for high clocks */
2324         if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2325             wm_high.yclk =
2326                 radeon_dpm_get_mclk(rdev, false) * 10;
2327             wm_high.sclk =
2328                 radeon_dpm_get_sclk(rdev, false) * 10;
2329         } else {
2330             wm_high.yclk = rdev->pm.current_mclk * 10;
2331             wm_high.sclk = rdev->pm.current_sclk * 10;
2332         }
2333 
2334         wm_high.disp_clk = mode->clock;
2335         wm_high.src_width = mode->crtc_hdisplay;
2336         wm_high.active_time = active_time;
2337         wm_high.blank_time = line_time - wm_high.active_time;
2338         wm_high.interlaced = false;
2339         if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2340             wm_high.interlaced = true;
2341         wm_high.vsc = radeon_crtc->vsc;
2342         wm_high.vtaps = 1;
2343         if (radeon_crtc->rmx_type != RMX_OFF)
2344             wm_high.vtaps = 2;
2345         wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2346         wm_high.lb_size = lb_size;
2347         wm_high.dram_channels = dram_channels;
2348         wm_high.num_heads = num_heads;
2349 
2350         /* watermark for low clocks */
2351         if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2352             wm_low.yclk =
2353                 radeon_dpm_get_mclk(rdev, true) * 10;
2354             wm_low.sclk =
2355                 radeon_dpm_get_sclk(rdev, true) * 10;
2356         } else {
2357             wm_low.yclk = rdev->pm.current_mclk * 10;
2358             wm_low.sclk = rdev->pm.current_sclk * 10;
2359         }
2360 
2361         wm_low.disp_clk = mode->clock;
2362         wm_low.src_width = mode->crtc_hdisplay;
2363         wm_low.active_time = active_time;
2364         wm_low.blank_time = line_time - wm_low.active_time;
2365         wm_low.interlaced = false;
2366         if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2367             wm_low.interlaced = true;
2368         wm_low.vsc = radeon_crtc->vsc;
2369         wm_low.vtaps = 1;
2370         if (radeon_crtc->rmx_type != RMX_OFF)
2371             wm_low.vtaps = 2;
2372         wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2373         wm_low.lb_size = lb_size;
2374         wm_low.dram_channels = dram_channels;
2375         wm_low.num_heads = num_heads;
2376 
2377         /* set for high clocks */
2378         latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2379         /* set for low clocks */
2380         latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2381 
2382         /* possibly force display priority to high */
2383         /* should really do this at mode validation time... */
2384         if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2385             !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2386             !dce6_check_latency_hiding(&wm_high) ||
2387             (rdev->disp_priority == 2)) {
2388             DRM_DEBUG_KMS("force priority to high\n");
2389             priority_a_cnt |= PRIORITY_ALWAYS_ON;
2390             priority_b_cnt |= PRIORITY_ALWAYS_ON;
2391         }
2392         if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2393             !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2394             !dce6_check_latency_hiding(&wm_low) ||
2395             (rdev->disp_priority == 2)) {
2396             DRM_DEBUG_KMS("force priority to high\n");
2397             priority_a_cnt |= PRIORITY_ALWAYS_ON;
2398             priority_b_cnt |= PRIORITY_ALWAYS_ON;
2399         }
2400 
2401         a.full = dfixed_const(1000);
2402         b.full = dfixed_const(mode->clock);
2403         b.full = dfixed_div(b, a);
2404         c.full = dfixed_const(latency_watermark_a);
2405         c.full = dfixed_mul(c, b);
2406         c.full = dfixed_mul(c, radeon_crtc->hsc);
2407         c.full = dfixed_div(c, a);
2408         a.full = dfixed_const(16);
2409         c.full = dfixed_div(c, a);
2410         priority_a_mark = dfixed_trunc(c);
2411         priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2412 
2413         a.full = dfixed_const(1000);
2414         b.full = dfixed_const(mode->clock);
2415         b.full = dfixed_div(b, a);
2416         c.full = dfixed_const(latency_watermark_b);
2417         c.full = dfixed_mul(c, b);
2418         c.full = dfixed_mul(c, radeon_crtc->hsc);
2419         c.full = dfixed_div(c, a);
2420         a.full = dfixed_const(16);
2421         c.full = dfixed_div(c, a);
2422         priority_b_mark = dfixed_trunc(c);
2423         priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2424 
2425         /* Save number of lines the linebuffer leads before the scanout */
2426         radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2427     }
2428 
2429     /* select wm A */
2430     arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2431     tmp = arb_control3;
2432     tmp &= ~LATENCY_WATERMARK_MASK(3);
2433     tmp |= LATENCY_WATERMARK_MASK(1);
2434     WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2435     WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2436            (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2437         LATENCY_HIGH_WATERMARK(line_time)));
2438     /* select wm B */
2439     tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2440     tmp &= ~LATENCY_WATERMARK_MASK(3);
2441     tmp |= LATENCY_WATERMARK_MASK(2);
2442     WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2443     WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2444            (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2445         LATENCY_HIGH_WATERMARK(line_time)));
2446     /* restore original selection */
2447     WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2448 
2449     /* write the priority marks */
2450     WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2451     WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2452 
2453     /* save values for DPM */
2454     radeon_crtc->line_time = line_time;
2455     radeon_crtc->wm_high = latency_watermark_a;
2456     radeon_crtc->wm_low = latency_watermark_b;
2457 }
2458 
2459 void dce6_bandwidth_update(struct radeon_device *rdev)
2460 {
2461     struct drm_display_mode *mode0 = NULL;
2462     struct drm_display_mode *mode1 = NULL;
2463     u32 num_heads = 0, lb_size;
2464     int i;
2465 
2466     if (!rdev->mode_info.mode_config_initialized)
2467         return;
2468 
2469     radeon_update_display_priority(rdev);
2470 
2471     for (i = 0; i < rdev->num_crtc; i++) {
2472         if (rdev->mode_info.crtcs[i]->base.enabled)
2473             num_heads++;
2474     }
2475     for (i = 0; i < rdev->num_crtc; i += 2) {
2476         mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2477         mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2478         lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2479         dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2480         lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2481         dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2482     }
2483 }
2484 
2485 /*
2486  * Core functions
2487  */
2488 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2489 {
2490     u32 *tile = rdev->config.si.tile_mode_array;
2491     const u32 num_tile_mode_states =
2492             ARRAY_SIZE(rdev->config.si.tile_mode_array);
2493     u32 reg_offset, split_equal_to_row_size;
2494 
2495     switch (rdev->config.si.mem_row_size_in_kb) {
2496     case 1:
2497         split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2498         break;
2499     case 2:
2500     default:
2501         split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2502         break;
2503     case 4:
2504         split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2505         break;
2506     }
2507 
2508     for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2509         tile[reg_offset] = 0;
2510 
2511     switch(rdev->family) {
2512     case CHIP_TAHITI:
2513     case CHIP_PITCAIRN:
2514         /* non-AA compressed depth or any compressed stencil */
2515         tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2518                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2519                NUM_BANKS(ADDR_SURF_16_BANK) |
2520                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2523         /* 2xAA/4xAA compressed depth only */
2524         tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2527                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2528                NUM_BANKS(ADDR_SURF_16_BANK) |
2529                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2531                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2532         /* 8xAA compressed depth only */
2533         tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2535                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2537                NUM_BANKS(ADDR_SURF_16_BANK) |
2538                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541         /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2542         tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2544                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2545                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2546                NUM_BANKS(ADDR_SURF_16_BANK) |
2547                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2549                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2550         /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2551         tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2552                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2554                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2555                NUM_BANKS(ADDR_SURF_16_BANK) |
2556                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2559         /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2560         tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2562                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2563                TILE_SPLIT(split_equal_to_row_size) |
2564                NUM_BANKS(ADDR_SURF_16_BANK) |
2565                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2567                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2568         /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2569         tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2571                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2572                TILE_SPLIT(split_equal_to_row_size) |
2573                NUM_BANKS(ADDR_SURF_16_BANK) |
2574                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2577         /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2578         tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2580                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2581                TILE_SPLIT(split_equal_to_row_size) |
2582                NUM_BANKS(ADDR_SURF_16_BANK) |
2583                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2585                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2586         /* 1D and 1D Array Surfaces */
2587         tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2588                MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2589                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2590                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2591                NUM_BANKS(ADDR_SURF_16_BANK) |
2592                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2595         /* Displayable maps. */
2596         tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2597                MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2598                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2599                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2600                NUM_BANKS(ADDR_SURF_16_BANK) |
2601                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2604         /* Display 8bpp. */
2605         tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2608                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2609                NUM_BANKS(ADDR_SURF_16_BANK) |
2610                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2613         /* Display 16bpp. */
2614         tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615                MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2617                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2618                NUM_BANKS(ADDR_SURF_16_BANK) |
2619                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2621                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2622         /* Display 32bpp. */
2623         tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624                MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2625                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2627                NUM_BANKS(ADDR_SURF_16_BANK) |
2628                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2631         /* Thin. */
2632         tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2633                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2634                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2635                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2636                NUM_BANKS(ADDR_SURF_16_BANK) |
2637                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2639                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2640         /* Thin 8 bpp. */
2641         tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2643                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2644                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645                NUM_BANKS(ADDR_SURF_16_BANK) |
2646                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2649         /* Thin 16 bpp. */
2650         tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2652                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2653                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2654                NUM_BANKS(ADDR_SURF_16_BANK) |
2655                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2657                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2658         /* Thin 32 bpp. */
2659         tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2661                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2663                NUM_BANKS(ADDR_SURF_16_BANK) |
2664                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2666                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2667         /* Thin 64 bpp. */
2668         tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2670                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2671                TILE_SPLIT(split_equal_to_row_size) |
2672                NUM_BANKS(ADDR_SURF_16_BANK) |
2673                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2675                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2676         /* 8 bpp PRT. */
2677         tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2679                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2680                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2681                NUM_BANKS(ADDR_SURF_16_BANK) |
2682                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2683                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2684                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2685         /* 16 bpp PRT */
2686         tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2688                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2689                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2690                NUM_BANKS(ADDR_SURF_16_BANK) |
2691                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2693                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2694         /* 32 bpp PRT */
2695         tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2697                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2698                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699                NUM_BANKS(ADDR_SURF_16_BANK) |
2700                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2702                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2703         /* 64 bpp PRT */
2704         tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2706                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2707                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2708                NUM_BANKS(ADDR_SURF_16_BANK) |
2709                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2712         /* 128 bpp PRT */
2713         tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2715                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2716                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2717                NUM_BANKS(ADDR_SURF_8_BANK) |
2718                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2720                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2721 
2722         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2723             WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2724         break;
2725 
2726     case CHIP_VERDE:
2727     case CHIP_OLAND:
2728     case CHIP_HAINAN:
2729         /* non-AA compressed depth or any compressed stencil */
2730         tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2731                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2732                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2733                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2734                NUM_BANKS(ADDR_SURF_16_BANK) |
2735                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2737                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2738         /* 2xAA/4xAA compressed depth only */
2739         tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2741                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2742                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2743                NUM_BANKS(ADDR_SURF_16_BANK) |
2744                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2747         /* 8xAA compressed depth only */
2748         tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2750                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2752                NUM_BANKS(ADDR_SURF_16_BANK) |
2753                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2755                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2756         /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2757         tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2759                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2760                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2761                NUM_BANKS(ADDR_SURF_16_BANK) |
2762                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2765         /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2766         tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2767                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2768                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2769                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2770                NUM_BANKS(ADDR_SURF_16_BANK) |
2771                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2773                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2774         /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2775         tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2777                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                TILE_SPLIT(split_equal_to_row_size) |
2779                NUM_BANKS(ADDR_SURF_16_BANK) |
2780                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2783         /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2784         tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2786                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                TILE_SPLIT(split_equal_to_row_size) |
2788                NUM_BANKS(ADDR_SURF_16_BANK) |
2789                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2791                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2792         /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2793         tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794                MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2795                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2796                TILE_SPLIT(split_equal_to_row_size) |
2797                NUM_BANKS(ADDR_SURF_16_BANK) |
2798                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2800                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2801         /* 1D and 1D Array Surfaces */
2802         tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2803                MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2804                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2805                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2806                NUM_BANKS(ADDR_SURF_16_BANK) |
2807                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2810         /* Displayable maps. */
2811         tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812                MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2815                NUM_BANKS(ADDR_SURF_16_BANK) |
2816                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2818                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2819         /* Display 8bpp. */
2820         tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821                MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2822                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2824                NUM_BANKS(ADDR_SURF_16_BANK) |
2825                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2827                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2828         /* Display 16bpp. */
2829         tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830                MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2832                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2833                NUM_BANKS(ADDR_SURF_16_BANK) |
2834                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2837         /* Display 32bpp. */
2838         tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839                MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2841                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2842                NUM_BANKS(ADDR_SURF_16_BANK) |
2843                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2845                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2846         /* Thin. */
2847         tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2849                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2851                NUM_BANKS(ADDR_SURF_16_BANK) |
2852                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2854                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2855         /* Thin 8 bpp. */
2856         tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2857                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2858                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2859                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2860                NUM_BANKS(ADDR_SURF_16_BANK) |
2861                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2864         /* Thin 16 bpp. */
2865         tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2868                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2869                NUM_BANKS(ADDR_SURF_16_BANK) |
2870                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2872                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2873         /* Thin 32 bpp. */
2874         tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2876                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2877                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2878                NUM_BANKS(ADDR_SURF_16_BANK) |
2879                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2882         /* Thin 64 bpp. */
2883         tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2885                PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886                TILE_SPLIT(split_equal_to_row_size) |
2887                NUM_BANKS(ADDR_SURF_16_BANK) |
2888                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2890                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2891         /* 8 bpp PRT. */
2892         tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2894                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2895                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2896                NUM_BANKS(ADDR_SURF_16_BANK) |
2897                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2898                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2899                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2900         /* 16 bpp PRT */
2901         tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2903                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2904                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2905                NUM_BANKS(ADDR_SURF_16_BANK) |
2906                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2908                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2909         /* 32 bpp PRT */
2910         tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2912                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2913                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2914                NUM_BANKS(ADDR_SURF_16_BANK) |
2915                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2917                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2918         /* 64 bpp PRT */
2919         tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2921                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2922                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2923                NUM_BANKS(ADDR_SURF_16_BANK) |
2924                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2926                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2927         /* 128 bpp PRT */
2928         tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929                MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2930                PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2931                TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2932                NUM_BANKS(ADDR_SURF_8_BANK) |
2933                BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934                BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935                MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2936 
2937         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2938             WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2939         break;
2940 
2941     default:
2942         DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2943     }
2944 }
2945 
2946 static void si_select_se_sh(struct radeon_device *rdev,
2947                 u32 se_num, u32 sh_num)
2948 {
2949     u32 data = INSTANCE_BROADCAST_WRITES;
2950 
2951     if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2952         data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2953     else if (se_num == 0xffffffff)
2954         data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2955     else if (sh_num == 0xffffffff)
2956         data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2957     else
2958         data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2959     WREG32(GRBM_GFX_INDEX, data);
2960 }
2961 
2962 static u32 si_create_bitmask(u32 bit_width)
2963 {
2964     u32 i, mask = 0;
2965 
2966     for (i = 0; i < bit_width; i++) {
2967         mask <<= 1;
2968         mask |= 1;
2969     }
2970     return mask;
2971 }
2972 
2973 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2974 {
2975     u32 data, mask;
2976 
2977     data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2978     if (data & 1)
2979         data &= INACTIVE_CUS_MASK;
2980     else
2981         data = 0;
2982     data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2983 
2984     data >>= INACTIVE_CUS_SHIFT;
2985 
2986     mask = si_create_bitmask(cu_per_sh);
2987 
2988     return ~data & mask;
2989 }
2990 
2991 static void si_setup_spi(struct radeon_device *rdev,
2992              u32 se_num, u32 sh_per_se,
2993              u32 cu_per_sh)
2994 {
2995     int i, j, k;
2996     u32 data, mask, active_cu;
2997 
2998     for (i = 0; i < se_num; i++) {
2999         for (j = 0; j < sh_per_se; j++) {
3000             si_select_se_sh(rdev, i, j);
3001             data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3002             active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3003 
3004             mask = 1;
3005             for (k = 0; k < 16; k++) {
3006                 mask <<= k;
3007                 if (active_cu & mask) {
3008                     data &= ~mask;
3009                     WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3010                     break;
3011                 }
3012             }
3013         }
3014     }
3015     si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3016 }
3017 
3018 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3019                   u32 max_rb_num_per_se,
3020                   u32 sh_per_se)
3021 {
3022     u32 data, mask;
3023 
3024     data = RREG32(CC_RB_BACKEND_DISABLE);
3025     if (data & 1)
3026         data &= BACKEND_DISABLE_MASK;
3027     else
3028         data = 0;
3029     data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3030 
3031     data >>= BACKEND_DISABLE_SHIFT;
3032 
3033     mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3034 
3035     return data & mask;
3036 }
3037 
3038 static void si_setup_rb(struct radeon_device *rdev,
3039             u32 se_num, u32 sh_per_se,
3040             u32 max_rb_num_per_se)
3041 {
3042     int i, j;
3043     u32 data, mask;
3044     u32 disabled_rbs = 0;
3045     u32 enabled_rbs = 0;
3046 
3047     for (i = 0; i < se_num; i++) {
3048         for (j = 0; j < sh_per_se; j++) {
3049             si_select_se_sh(rdev, i, j);
3050             data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3051             disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3052         }
3053     }
3054     si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3055 
3056     mask = 1;
3057     for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3058         if (!(disabled_rbs & mask))
3059             enabled_rbs |= mask;
3060         mask <<= 1;
3061     }
3062 
3063     rdev->config.si.backend_enable_mask = enabled_rbs;
3064 
3065     for (i = 0; i < se_num; i++) {
3066         si_select_se_sh(rdev, i, 0xffffffff);
3067         data = 0;
3068         for (j = 0; j < sh_per_se; j++) {
3069             switch (enabled_rbs & 3) {
3070             case 1:
3071                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3072                 break;
3073             case 2:
3074                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3075                 break;
3076             case 3:
3077             default:
3078                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3079                 break;
3080             }
3081             enabled_rbs >>= 2;
3082         }
3083         WREG32(PA_SC_RASTER_CONFIG, data);
3084     }
3085     si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3086 }
3087 
3088 static void si_gpu_init(struct radeon_device *rdev)
3089 {
3090     u32 gb_addr_config = 0;
3091     u32 mc_arb_ramcfg;
3092     u32 sx_debug_1;
3093     u32 hdp_host_path_cntl;
3094     u32 tmp;
3095     int i, j;
3096 
3097     switch (rdev->family) {
3098     case CHIP_TAHITI:
3099         rdev->config.si.max_shader_engines = 2;
3100         rdev->config.si.max_tile_pipes = 12;
3101         rdev->config.si.max_cu_per_sh = 8;
3102         rdev->config.si.max_sh_per_se = 2;
3103         rdev->config.si.max_backends_per_se = 4;
3104         rdev->config.si.max_texture_channel_caches = 12;
3105         rdev->config.si.max_gprs = 256;
3106         rdev->config.si.max_gs_threads = 32;
3107         rdev->config.si.max_hw_contexts = 8;
3108 
3109         rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3110         rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3111         rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3112         rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3113         gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3114         break;
3115     case CHIP_PITCAIRN:
3116         rdev->config.si.max_shader_engines = 2;
3117         rdev->config.si.max_tile_pipes = 8;
3118         rdev->config.si.max_cu_per_sh = 5;
3119         rdev->config.si.max_sh_per_se = 2;
3120         rdev->config.si.max_backends_per_se = 4;
3121         rdev->config.si.max_texture_channel_caches = 8;
3122         rdev->config.si.max_gprs = 256;
3123         rdev->config.si.max_gs_threads = 32;
3124         rdev->config.si.max_hw_contexts = 8;
3125 
3126         rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3127         rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3128         rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3129         rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3130         gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3131         break;
3132     case CHIP_VERDE:
3133     default:
3134         rdev->config.si.max_shader_engines = 1;
3135         rdev->config.si.max_tile_pipes = 4;
3136         rdev->config.si.max_cu_per_sh = 5;
3137         rdev->config.si.max_sh_per_se = 2;
3138         rdev->config.si.max_backends_per_se = 4;
3139         rdev->config.si.max_texture_channel_caches = 4;
3140         rdev->config.si.max_gprs = 256;
3141         rdev->config.si.max_gs_threads = 32;
3142         rdev->config.si.max_hw_contexts = 8;
3143 
3144         rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3145         rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3146         rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3147         rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3148         gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3149         break;
3150     case CHIP_OLAND:
3151         rdev->config.si.max_shader_engines = 1;
3152         rdev->config.si.max_tile_pipes = 4;
3153         rdev->config.si.max_cu_per_sh = 6;
3154         rdev->config.si.max_sh_per_se = 1;
3155         rdev->config.si.max_backends_per_se = 2;
3156         rdev->config.si.max_texture_channel_caches = 4;
3157         rdev->config.si.max_gprs = 256;
3158         rdev->config.si.max_gs_threads = 16;
3159         rdev->config.si.max_hw_contexts = 8;
3160 
3161         rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3162         rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3163         rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3164         rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3165         gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3166         break;
3167     case CHIP_HAINAN:
3168         rdev->config.si.max_shader_engines = 1;
3169         rdev->config.si.max_tile_pipes = 4;
3170         rdev->config.si.max_cu_per_sh = 5;
3171         rdev->config.si.max_sh_per_se = 1;
3172         rdev->config.si.max_backends_per_se = 1;
3173         rdev->config.si.max_texture_channel_caches = 2;
3174         rdev->config.si.max_gprs = 256;
3175         rdev->config.si.max_gs_threads = 16;
3176         rdev->config.si.max_hw_contexts = 8;
3177 
3178         rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3179         rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3180         rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3181         rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3182         gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3183         break;
3184     }
3185 
3186     /* Initialize HDP */
3187     for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3188         WREG32((0x2c14 + j), 0x00000000);
3189         WREG32((0x2c18 + j), 0x00000000);
3190         WREG32((0x2c1c + j), 0x00000000);
3191         WREG32((0x2c20 + j), 0x00000000);
3192         WREG32((0x2c24 + j), 0x00000000);
3193     }
3194 
3195     WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3196     WREG32(SRBM_INT_CNTL, 1);
3197     WREG32(SRBM_INT_ACK, 1);
3198 
3199     evergreen_fix_pci_max_read_req_size(rdev);
3200 
3201     WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3202 
3203     RREG32(MC_SHARED_CHMAP);
3204     mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3205 
3206     rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3207     rdev->config.si.mem_max_burst_length_bytes = 256;
3208     tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3209     rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3210     if (rdev->config.si.mem_row_size_in_kb > 4)
3211         rdev->config.si.mem_row_size_in_kb = 4;
3212     /* XXX use MC settings? */
3213     rdev->config.si.shader_engine_tile_size = 32;
3214     rdev->config.si.num_gpus = 1;
3215     rdev->config.si.multi_gpu_tile_size = 64;
3216 
3217     /* fix up row size */
3218     gb_addr_config &= ~ROW_SIZE_MASK;
3219     switch (rdev->config.si.mem_row_size_in_kb) {
3220     case 1:
3221     default:
3222         gb_addr_config |= ROW_SIZE(0);
3223         break;
3224     case 2:
3225         gb_addr_config |= ROW_SIZE(1);
3226         break;
3227     case 4:
3228         gb_addr_config |= ROW_SIZE(2);
3229         break;
3230     }
3231 
3232     /* setup tiling info dword.  gb_addr_config is not adequate since it does
3233      * not have bank info, so create a custom tiling dword.
3234      * bits 3:0   num_pipes
3235      * bits 7:4   num_banks
3236      * bits 11:8  group_size
3237      * bits 15:12 row_size
3238      */
3239     rdev->config.si.tile_config = 0;
3240     switch (rdev->config.si.num_tile_pipes) {
3241     case 1:
3242         rdev->config.si.tile_config |= (0 << 0);
3243         break;
3244     case 2:
3245         rdev->config.si.tile_config |= (1 << 0);
3246         break;
3247     case 4:
3248         rdev->config.si.tile_config |= (2 << 0);
3249         break;
3250     case 8:
3251     default:
3252         /* XXX what about 12? */
3253         rdev->config.si.tile_config |= (3 << 0);
3254         break;
3255     }
3256     switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3257     case 0: /* four banks */
3258         rdev->config.si.tile_config |= 0 << 4;
3259         break;
3260     case 1: /* eight banks */
3261         rdev->config.si.tile_config |= 1 << 4;
3262         break;
3263     case 2: /* sixteen banks */
3264     default:
3265         rdev->config.si.tile_config |= 2 << 4;
3266         break;
3267     }
3268     rdev->config.si.tile_config |=
3269         ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3270     rdev->config.si.tile_config |=
3271         ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3272 
3273     WREG32(GB_ADDR_CONFIG, gb_addr_config);
3274     WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3275     WREG32(DMIF_ADDR_CALC, gb_addr_config);
3276     WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3277     WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3278     WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3279     if (rdev->has_uvd) {
3280         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3281         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3282         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3283     }
3284 
3285     si_tiling_mode_table_init(rdev);
3286 
3287     si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3288             rdev->config.si.max_sh_per_se,
3289             rdev->config.si.max_backends_per_se);
3290 
3291     si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3292              rdev->config.si.max_sh_per_se,
3293              rdev->config.si.max_cu_per_sh);
3294 
3295     rdev->config.si.active_cus = 0;
3296     for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3297         for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3298             rdev->config.si.active_cus +=
3299                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3300         }
3301     }
3302 
3303     /* set HW defaults for 3D engine */
3304     WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3305                      ROQ_IB2_START(0x2b)));
3306     WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3307 
3308     sx_debug_1 = RREG32(SX_DEBUG_1);
3309     WREG32(SX_DEBUG_1, sx_debug_1);
3310 
3311     WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3312 
3313     WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3314                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3315                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3316                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3317 
3318     WREG32(VGT_NUM_INSTANCES, 1);
3319 
3320     WREG32(CP_PERFMON_CNTL, 0);
3321 
3322     WREG32(SQ_CONFIG, 0);
3323 
3324     WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3325                       FORCE_EOV_MAX_REZ_CNT(255)));
3326 
3327     WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3328            AUTO_INVLD_EN(ES_AND_GS_AUTO));
3329 
3330     WREG32(VGT_GS_VERTEX_REUSE, 16);
3331     WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3332 
3333     WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3334     WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3335     WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3336     WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3337     WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3338     WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3339     WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3340     WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3341 
3342     tmp = RREG32(HDP_MISC_CNTL);
3343     tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3344     WREG32(HDP_MISC_CNTL, tmp);
3345 
3346     hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3347     WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3348 
3349     WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3350 
3351     udelay(50);
3352 }
3353 
3354 /*
3355  * GPU scratch registers helpers function.
3356  */
3357 static void si_scratch_init(struct radeon_device *rdev)
3358 {
3359     int i;
3360 
3361     rdev->scratch.num_reg = 7;
3362     rdev->scratch.reg_base = SCRATCH_REG0;
3363     for (i = 0; i < rdev->scratch.num_reg; i++) {
3364         rdev->scratch.free[i] = true;
3365         rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3366     }
3367 }
3368 
3369 void si_fence_ring_emit(struct radeon_device *rdev,
3370             struct radeon_fence *fence)
3371 {
3372     struct radeon_ring *ring = &rdev->ring[fence->ring];
3373     u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3374 
3375     /* flush read cache over gart */
3376     radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3377     radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3378     radeon_ring_write(ring, 0);
3379     radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3380     radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3381               PACKET3_TC_ACTION_ENA |
3382               PACKET3_SH_KCACHE_ACTION_ENA |
3383               PACKET3_SH_ICACHE_ACTION_ENA);
3384     radeon_ring_write(ring, 0xFFFFFFFF);
3385     radeon_ring_write(ring, 0);
3386     radeon_ring_write(ring, 10); /* poll interval */
3387     /* EVENT_WRITE_EOP - flush caches, send int */
3388     radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3389     radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3390     radeon_ring_write(ring, lower_32_bits(addr));
3391     radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3392     radeon_ring_write(ring, fence->seq);
3393     radeon_ring_write(ring, 0);
3394 }
3395 
3396 /*
3397  * IB stuff
3398  */
3399 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3400 {
3401     struct radeon_ring *ring = &rdev->ring[ib->ring];
3402     unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3403     u32 header;
3404 
3405     if (ib->is_const_ib) {
3406         /* set switch buffer packet before const IB */
3407         radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3408         radeon_ring_write(ring, 0);
3409 
3410         header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3411     } else {
3412         u32 next_rptr;
3413         if (ring->rptr_save_reg) {
3414             next_rptr = ring->wptr + 3 + 4 + 8;
3415             radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3416             radeon_ring_write(ring, ((ring->rptr_save_reg -
3417                           PACKET3_SET_CONFIG_REG_START) >> 2));
3418             radeon_ring_write(ring, next_rptr);
3419         } else if (rdev->wb.enabled) {
3420             next_rptr = ring->wptr + 5 + 4 + 8;
3421             radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3422             radeon_ring_write(ring, (1 << 8));
3423             radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3424             radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3425             radeon_ring_write(ring, next_rptr);
3426         }
3427 
3428         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3429     }
3430 
3431     radeon_ring_write(ring, header);
3432     radeon_ring_write(ring,
3433 #ifdef __BIG_ENDIAN
3434               (2 << 0) |
3435 #endif
3436               (ib->gpu_addr & 0xFFFFFFFC));
3437     radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3438     radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3439 
3440     if (!ib->is_const_ib) {
3441         /* flush read cache over gart for this vmid */
3442         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3443         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3444         radeon_ring_write(ring, vm_id);
3445         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3446         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3447                   PACKET3_TC_ACTION_ENA |
3448                   PACKET3_SH_KCACHE_ACTION_ENA |
3449                   PACKET3_SH_ICACHE_ACTION_ENA);
3450         radeon_ring_write(ring, 0xFFFFFFFF);
3451         radeon_ring_write(ring, 0);
3452         radeon_ring_write(ring, 10); /* poll interval */
3453     }
3454 }
3455 
3456 /*
3457  * CP.
3458  */
3459 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3460 {
3461     if (enable)
3462         WREG32(CP_ME_CNTL, 0);
3463     else {
3464         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3465             radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3466         WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3467         WREG32(SCRATCH_UMSK, 0);
3468         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3469         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3470         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3471     }
3472     udelay(50);
3473 }
3474 
3475 static int si_cp_load_microcode(struct radeon_device *rdev)
3476 {
3477     int i;
3478 
3479     if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3480         return -EINVAL;
3481 
3482     si_cp_enable(rdev, false);
3483 
3484     if (rdev->new_fw) {
3485         const struct gfx_firmware_header_v1_0 *pfp_hdr =
3486             (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3487         const struct gfx_firmware_header_v1_0 *ce_hdr =
3488             (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3489         const struct gfx_firmware_header_v1_0 *me_hdr =
3490             (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3491         const __le32 *fw_data;
3492         u32 fw_size;
3493 
3494         radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3495         radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3496         radeon_ucode_print_gfx_hdr(&me_hdr->header);
3497 
3498         /* PFP */
3499         fw_data = (const __le32 *)
3500             (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3501         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3502         WREG32(CP_PFP_UCODE_ADDR, 0);
3503         for (i = 0; i < fw_size; i++)
3504             WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3505         WREG32(CP_PFP_UCODE_ADDR, 0);
3506 
3507         /* CE */
3508         fw_data = (const __le32 *)
3509             (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3510         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3511         WREG32(CP_CE_UCODE_ADDR, 0);
3512         for (i = 0; i < fw_size; i++)
3513             WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3514         WREG32(CP_CE_UCODE_ADDR, 0);
3515 
3516         /* ME */
3517         fw_data = (const __be32 *)
3518             (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3519         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3520         WREG32(CP_ME_RAM_WADDR, 0);
3521         for (i = 0; i < fw_size; i++)
3522             WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3523         WREG32(CP_ME_RAM_WADDR, 0);
3524     } else {
3525         const __be32 *fw_data;
3526 
3527         /* PFP */
3528         fw_data = (const __be32 *)rdev->pfp_fw->data;
3529         WREG32(CP_PFP_UCODE_ADDR, 0);
3530         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3531             WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3532         WREG32(CP_PFP_UCODE_ADDR, 0);
3533 
3534         /* CE */
3535         fw_data = (const __be32 *)rdev->ce_fw->data;
3536         WREG32(CP_CE_UCODE_ADDR, 0);
3537         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3538             WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3539         WREG32(CP_CE_UCODE_ADDR, 0);
3540 
3541         /* ME */
3542         fw_data = (const __be32 *)rdev->me_fw->data;
3543         WREG32(CP_ME_RAM_WADDR, 0);
3544         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3545             WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3546         WREG32(CP_ME_RAM_WADDR, 0);
3547     }
3548 
3549     WREG32(CP_PFP_UCODE_ADDR, 0);
3550     WREG32(CP_CE_UCODE_ADDR, 0);
3551     WREG32(CP_ME_RAM_WADDR, 0);
3552     WREG32(CP_ME_RAM_RADDR, 0);
3553     return 0;
3554 }
3555 
3556 static int si_cp_start(struct radeon_device *rdev)
3557 {
3558     struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3559     int r, i;
3560 
3561     r = radeon_ring_lock(rdev, ring, 7 + 4);
3562     if (r) {
3563         DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3564         return r;
3565     }
3566     /* init the CP */
3567     radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3568     radeon_ring_write(ring, 0x1);
3569     radeon_ring_write(ring, 0x0);
3570     radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3571     radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3572     radeon_ring_write(ring, 0);
3573     radeon_ring_write(ring, 0);
3574 
3575     /* init the CE partitions */
3576     radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3577     radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3578     radeon_ring_write(ring, 0xc000);
3579     radeon_ring_write(ring, 0xe000);
3580     radeon_ring_unlock_commit(rdev, ring, false);
3581 
3582     si_cp_enable(rdev, true);
3583 
3584     r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3585     if (r) {
3586         DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3587         return r;
3588     }
3589 
3590     /* setup clear context state */
3591     radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3592     radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3593 
3594     for (i = 0; i < si_default_size; i++)
3595         radeon_ring_write(ring, si_default_state[i]);
3596 
3597     radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3598     radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3599 
3600     /* set clear context state */
3601     radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3602     radeon_ring_write(ring, 0);
3603 
3604     radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3605     radeon_ring_write(ring, 0x00000316);
3606     radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3607     radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3608 
3609     radeon_ring_unlock_commit(rdev, ring, false);
3610 
3611     for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3612         ring = &rdev->ring[i];
3613         r = radeon_ring_lock(rdev, ring, 2);
3614 
3615         /* clear the compute context state */
3616         radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3617         radeon_ring_write(ring, 0);
3618 
3619         radeon_ring_unlock_commit(rdev, ring, false);
3620     }
3621 
3622     return 0;
3623 }
3624 
3625 static void si_cp_fini(struct radeon_device *rdev)
3626 {
3627     struct radeon_ring *ring;
3628     si_cp_enable(rdev, false);
3629 
3630     ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3631     radeon_ring_fini(rdev, ring);
3632     radeon_scratch_free(rdev, ring->rptr_save_reg);
3633 
3634     ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3635     radeon_ring_fini(rdev, ring);
3636     radeon_scratch_free(rdev, ring->rptr_save_reg);
3637 
3638     ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3639     radeon_ring_fini(rdev, ring);
3640     radeon_scratch_free(rdev, ring->rptr_save_reg);
3641 }
3642 
3643 static int si_cp_resume(struct radeon_device *rdev)
3644 {
3645     struct radeon_ring *ring;
3646     u32 tmp;
3647     u32 rb_bufsz;
3648     int r;
3649 
3650     si_enable_gui_idle_interrupt(rdev, false);
3651 
3652     WREG32(CP_SEM_WAIT_TIMER, 0x0);
3653     WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3654 
3655     /* Set the write pointer delay */
3656     WREG32(CP_RB_WPTR_DELAY, 0);
3657 
3658     WREG32(CP_DEBUG, 0);
3659     WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3660 
3661     /* ring 0 - compute and gfx */
3662     /* Set ring buffer size */
3663     ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3664     rb_bufsz = order_base_2(ring->ring_size / 8);
3665     tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3666 #ifdef __BIG_ENDIAN
3667     tmp |= BUF_SWAP_32BIT;
3668 #endif
3669     WREG32(CP_RB0_CNTL, tmp);
3670 
3671     /* Initialize the ring buffer's read and write pointers */
3672     WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3673     ring->wptr = 0;
3674     WREG32(CP_RB0_WPTR, ring->wptr);
3675 
3676     /* set the wb address whether it's enabled or not */
3677     WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3678     WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3679 
3680     if (rdev->wb.enabled)
3681         WREG32(SCRATCH_UMSK, 0xff);
3682     else {
3683         tmp |= RB_NO_UPDATE;
3684         WREG32(SCRATCH_UMSK, 0);
3685     }
3686 
3687     mdelay(1);
3688     WREG32(CP_RB0_CNTL, tmp);
3689 
3690     WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3691 
3692     /* ring1  - compute only */
3693     /* Set ring buffer size */
3694     ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3695     rb_bufsz = order_base_2(ring->ring_size / 8);
3696     tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3697 #ifdef __BIG_ENDIAN
3698     tmp |= BUF_SWAP_32BIT;
3699 #endif
3700     WREG32(CP_RB1_CNTL, tmp);
3701 
3702     /* Initialize the ring buffer's read and write pointers */
3703     WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3704     ring->wptr = 0;
3705     WREG32(CP_RB1_WPTR, ring->wptr);
3706 
3707     /* set the wb address whether it's enabled or not */
3708     WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3709     WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3710 
3711     mdelay(1);
3712     WREG32(CP_RB1_CNTL, tmp);
3713 
3714     WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3715 
3716     /* ring2 - compute only */
3717     /* Set ring buffer size */
3718     ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3719     rb_bufsz = order_base_2(ring->ring_size / 8);
3720     tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3721 #ifdef __BIG_ENDIAN
3722     tmp |= BUF_SWAP_32BIT;
3723 #endif
3724     WREG32(CP_RB2_CNTL, tmp);
3725 
3726     /* Initialize the ring buffer's read and write pointers */
3727     WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3728     ring->wptr = 0;
3729     WREG32(CP_RB2_WPTR, ring->wptr);
3730 
3731     /* set the wb address whether it's enabled or not */
3732     WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3733     WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3734 
3735     mdelay(1);
3736     WREG32(CP_RB2_CNTL, tmp);
3737 
3738     WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3739 
3740     /* start the rings */
3741     si_cp_start(rdev);
3742     rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3743     rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3744     rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3745     r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3746     if (r) {
3747         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3748         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3749         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3750         return r;
3751     }
3752     r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3753     if (r) {
3754         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3755     }
3756     r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3757     if (r) {
3758         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3759     }
3760 
3761     si_enable_gui_idle_interrupt(rdev, true);
3762 
3763     if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3764         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3765 
3766     return 0;
3767 }
3768 
3769 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3770 {
3771     u32 reset_mask = 0;
3772     u32 tmp;
3773 
3774     /* GRBM_STATUS */
3775     tmp = RREG32(GRBM_STATUS);
3776     if (tmp & (PA_BUSY | SC_BUSY |
3777            BCI_BUSY | SX_BUSY |
3778            TA_BUSY | VGT_BUSY |
3779            DB_BUSY | CB_BUSY |
3780            GDS_BUSY | SPI_BUSY |
3781            IA_BUSY | IA_BUSY_NO_DMA))
3782         reset_mask |= RADEON_RESET_GFX;
3783 
3784     if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3785            CP_BUSY | CP_COHERENCY_BUSY))
3786         reset_mask |= RADEON_RESET_CP;
3787 
3788     if (tmp & GRBM_EE_BUSY)
3789         reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3790 
3791     /* GRBM_STATUS2 */
3792     tmp = RREG32(GRBM_STATUS2);
3793     if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3794         reset_mask |= RADEON_RESET_RLC;
3795 
3796     /* DMA_STATUS_REG 0 */
3797     tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3798     if (!(tmp & DMA_IDLE))
3799         reset_mask |= RADEON_RESET_DMA;
3800 
3801     /* DMA_STATUS_REG 1 */
3802     tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3803     if (!(tmp & DMA_IDLE))
3804         reset_mask |= RADEON_RESET_DMA1;
3805 
3806     /* SRBM_STATUS2 */
3807     tmp = RREG32(SRBM_STATUS2);
3808     if (tmp & DMA_BUSY)
3809         reset_mask |= RADEON_RESET_DMA;
3810 
3811     if (tmp & DMA1_BUSY)
3812         reset_mask |= RADEON_RESET_DMA1;
3813 
3814     /* SRBM_STATUS */
3815     tmp = RREG32(SRBM_STATUS);
3816 
3817     if (tmp & IH_BUSY)
3818         reset_mask |= RADEON_RESET_IH;
3819 
3820     if (tmp & SEM_BUSY)
3821         reset_mask |= RADEON_RESET_SEM;
3822 
3823     if (tmp & GRBM_RQ_PENDING)
3824         reset_mask |= RADEON_RESET_GRBM;
3825 
3826     if (tmp & VMC_BUSY)
3827         reset_mask |= RADEON_RESET_VMC;
3828 
3829     if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3830            MCC_BUSY | MCD_BUSY))
3831         reset_mask |= RADEON_RESET_MC;
3832 
3833     if (evergreen_is_display_hung(rdev))
3834         reset_mask |= RADEON_RESET_DISPLAY;
3835 
3836     /* VM_L2_STATUS */
3837     tmp = RREG32(VM_L2_STATUS);
3838     if (tmp & L2_BUSY)
3839         reset_mask |= RADEON_RESET_VMC;
3840 
3841     /* Skip MC reset as it's mostly likely not hung, just busy */
3842     if (reset_mask & RADEON_RESET_MC) {
3843         DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3844         reset_mask &= ~RADEON_RESET_MC;
3845     }
3846 
3847     return reset_mask;
3848 }
3849 
3850 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3851 {
3852     struct evergreen_mc_save save;
3853     u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3854     u32 tmp;
3855 
3856     if (reset_mask == 0)
3857         return;
3858 
3859     dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3860 
3861     evergreen_print_gpu_status_regs(rdev);
3862     dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3863          RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3864     dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3865          RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3866 
3867     /* disable PG/CG */
3868     si_fini_pg(rdev);
3869     si_fini_cg(rdev);
3870 
3871     /* stop the rlc */
3872     si_rlc_stop(rdev);
3873 
3874     /* Disable CP parsing/prefetching */
3875     WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3876 
3877     if (reset_mask & RADEON_RESET_DMA) {
3878         /* dma0 */
3879         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3880         tmp &= ~DMA_RB_ENABLE;
3881         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3882     }
3883     if (reset_mask & RADEON_RESET_DMA1) {
3884         /* dma1 */
3885         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3886         tmp &= ~DMA_RB_ENABLE;
3887         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3888     }
3889 
3890     udelay(50);
3891 
3892     evergreen_mc_stop(rdev, &save);
3893     if (evergreen_mc_wait_for_idle(rdev)) {
3894         dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3895     }
3896 
3897     if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3898         grbm_soft_reset = SOFT_RESET_CB |
3899             SOFT_RESET_DB |
3900             SOFT_RESET_GDS |
3901             SOFT_RESET_PA |
3902             SOFT_RESET_SC |
3903             SOFT_RESET_BCI |
3904             SOFT_RESET_SPI |
3905             SOFT_RESET_SX |
3906             SOFT_RESET_TC |
3907             SOFT_RESET_TA |
3908             SOFT_RESET_VGT |
3909             SOFT_RESET_IA;
3910     }
3911 
3912     if (reset_mask & RADEON_RESET_CP) {
3913         grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3914 
3915         srbm_soft_reset |= SOFT_RESET_GRBM;
3916     }
3917 
3918     if (reset_mask & RADEON_RESET_DMA)
3919         srbm_soft_reset |= SOFT_RESET_DMA;
3920 
3921     if (reset_mask & RADEON_RESET_DMA1)
3922         srbm_soft_reset |= SOFT_RESET_DMA1;
3923 
3924     if (reset_mask & RADEON_RESET_DISPLAY)
3925         srbm_soft_reset |= SOFT_RESET_DC;
3926 
3927     if (reset_mask & RADEON_RESET_RLC)
3928         grbm_soft_reset |= SOFT_RESET_RLC;
3929 
3930     if (reset_mask & RADEON_RESET_SEM)
3931         srbm_soft_reset |= SOFT_RESET_SEM;
3932 
3933     if (reset_mask & RADEON_RESET_IH)
3934         srbm_soft_reset |= SOFT_RESET_IH;
3935 
3936     if (reset_mask & RADEON_RESET_GRBM)
3937         srbm_soft_reset |= SOFT_RESET_GRBM;
3938 
3939     if (reset_mask & RADEON_RESET_VMC)
3940         srbm_soft_reset |= SOFT_RESET_VMC;
3941 
3942     if (reset_mask & RADEON_RESET_MC)
3943         srbm_soft_reset |= SOFT_RESET_MC;
3944 
3945     if (grbm_soft_reset) {
3946         tmp = RREG32(GRBM_SOFT_RESET);
3947         tmp |= grbm_soft_reset;
3948         dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3949         WREG32(GRBM_SOFT_RESET, tmp);
3950         tmp = RREG32(GRBM_SOFT_RESET);
3951 
3952         udelay(50);
3953 
3954         tmp &= ~grbm_soft_reset;
3955         WREG32(GRBM_SOFT_RESET, tmp);
3956         tmp = RREG32(GRBM_SOFT_RESET);
3957     }
3958 
3959     if (srbm_soft_reset) {
3960         tmp = RREG32(SRBM_SOFT_RESET);
3961         tmp |= srbm_soft_reset;
3962         dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3963         WREG32(SRBM_SOFT_RESET, tmp);
3964         tmp = RREG32(SRBM_SOFT_RESET);
3965 
3966         udelay(50);
3967 
3968         tmp &= ~srbm_soft_reset;
3969         WREG32(SRBM_SOFT_RESET, tmp);
3970         tmp = RREG32(SRBM_SOFT_RESET);
3971     }
3972 
3973     /* Wait a little for things to settle down */
3974     udelay(50);
3975 
3976     evergreen_mc_resume(rdev, &save);
3977     udelay(50);
3978 
3979     evergreen_print_gpu_status_regs(rdev);
3980 }
3981 
3982 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3983 {
3984     u32 tmp, i;
3985 
3986     tmp = RREG32(CG_SPLL_FUNC_CNTL);
3987     tmp |= SPLL_BYPASS_EN;
3988     WREG32(CG_SPLL_FUNC_CNTL, tmp);
3989 
3990     tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3991     tmp |= SPLL_CTLREQ_CHG;
3992     WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3993 
3994     for (i = 0; i < rdev->usec_timeout; i++) {
3995         if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3996             break;
3997         udelay(1);
3998     }
3999 
4000     tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4001     tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4002     WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4003 
4004     tmp = RREG32(MPLL_CNTL_MODE);
4005     tmp &= ~MPLL_MCLK_SEL;
4006     WREG32(MPLL_CNTL_MODE, tmp);
4007 }
4008 
4009 static void si_spll_powerdown(struct radeon_device *rdev)
4010 {
4011     u32 tmp;
4012 
4013     tmp = RREG32(SPLL_CNTL_MODE);
4014     tmp |= SPLL_SW_DIR_CONTROL;
4015     WREG32(SPLL_CNTL_MODE, tmp);
4016 
4017     tmp = RREG32(CG_SPLL_FUNC_CNTL);
4018     tmp |= SPLL_RESET;
4019     WREG32(CG_SPLL_FUNC_CNTL, tmp);
4020 
4021     tmp = RREG32(CG_SPLL_FUNC_CNTL);
4022     tmp |= SPLL_SLEEP;
4023     WREG32(CG_SPLL_FUNC_CNTL, tmp);
4024 
4025     tmp = RREG32(SPLL_CNTL_MODE);
4026     tmp &= ~SPLL_SW_DIR_CONTROL;
4027     WREG32(SPLL_CNTL_MODE, tmp);
4028 }
4029 
4030 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4031 {
4032     struct evergreen_mc_save save;
4033     u32 tmp, i;
4034 
4035     dev_info(rdev->dev, "GPU pci config reset\n");
4036 
4037     /* disable dpm? */
4038 
4039     /* disable cg/pg */
4040     si_fini_pg(rdev);
4041     si_fini_cg(rdev);
4042 
4043     /* Disable CP parsing/prefetching */
4044     WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4045     /* dma0 */
4046     tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4047     tmp &= ~DMA_RB_ENABLE;
4048     WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4049     /* dma1 */
4050     tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4051     tmp &= ~DMA_RB_ENABLE;
4052     WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4053     /* XXX other engines? */
4054 
4055     /* halt the rlc, disable cp internal ints */
4056     si_rlc_stop(rdev);
4057 
4058     udelay(50);
4059 
4060     /* disable mem access */
4061     evergreen_mc_stop(rdev, &save);
4062     if (evergreen_mc_wait_for_idle(rdev)) {
4063         dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4064     }
4065 
4066     /* set mclk/sclk to bypass */
4067     si_set_clk_bypass_mode(rdev);
4068     /* powerdown spll */
4069     si_spll_powerdown(rdev);
4070     /* disable BM */
4071     pci_clear_master(rdev->pdev);
4072     /* reset */
4073     radeon_pci_config_reset(rdev);
4074     /* wait for asic to come out of reset */
4075     for (i = 0; i < rdev->usec_timeout; i++) {
4076         if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4077             break;
4078         udelay(1);
4079     }
4080 }
4081 
4082 int si_asic_reset(struct radeon_device *rdev, bool hard)
4083 {
4084     u32 reset_mask;
4085 
4086     if (hard) {
4087         si_gpu_pci_config_reset(rdev);
4088         return 0;
4089     }
4090 
4091     reset_mask = si_gpu_check_soft_reset(rdev);
4092 
4093     if (reset_mask)
4094         r600_set_bios_scratch_engine_hung(rdev, true);
4095 
4096     /* try soft reset */
4097     si_gpu_soft_reset(rdev, reset_mask);
4098 
4099     reset_mask = si_gpu_check_soft_reset(rdev);
4100 
4101     /* try pci config reset */
4102     if (reset_mask && radeon_hard_reset)
4103         si_gpu_pci_config_reset(rdev);
4104 
4105     reset_mask = si_gpu_check_soft_reset(rdev);
4106 
4107     if (!reset_mask)
4108         r600_set_bios_scratch_engine_hung(rdev, false);
4109 
4110     return 0;
4111 }
4112 
4113 /**
4114  * si_gfx_is_lockup - Check if the GFX engine is locked up
4115  *
4116  * @rdev: radeon_device pointer
4117  * @ring: radeon_ring structure holding ring information
4118  *
4119  * Check if the GFX engine is locked up.
4120  * Returns true if the engine appears to be locked up, false if not.
4121  */
4122 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4123 {
4124     u32 reset_mask = si_gpu_check_soft_reset(rdev);
4125 
4126     if (!(reset_mask & (RADEON_RESET_GFX |
4127                 RADEON_RESET_COMPUTE |
4128                 RADEON_RESET_CP))) {
4129         radeon_ring_lockup_update(rdev, ring);
4130         return false;
4131     }
4132     return radeon_ring_test_lockup(rdev, ring);
4133 }
4134 
4135 /* MC */
4136 static void si_mc_program(struct radeon_device *rdev)
4137 {
4138     struct evergreen_mc_save save;
4139     u32 tmp;
4140     int i, j;
4141 
4142     /* Initialize HDP */
4143     for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4144         WREG32((0x2c14 + j), 0x00000000);
4145         WREG32((0x2c18 + j), 0x00000000);
4146         WREG32((0x2c1c + j), 0x00000000);
4147         WREG32((0x2c20 + j), 0x00000000);
4148         WREG32((0x2c24 + j), 0x00000000);
4149     }
4150     WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4151 
4152     evergreen_mc_stop(rdev, &save);
4153     if (radeon_mc_wait_for_idle(rdev)) {
4154         dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4155     }
4156     if (!ASIC_IS_NODCE(rdev))
4157         /* Lockout access through VGA aperture*/
4158         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4159     /* Update configuration */
4160     WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4161            rdev->mc.vram_start >> 12);
4162     WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4163            rdev->mc.vram_end >> 12);
4164     WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4165            rdev->vram_scratch.gpu_addr >> 12);
4166     tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4167     tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4168     WREG32(MC_VM_FB_LOCATION, tmp);
4169     /* XXX double check these! */
4170     WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4171     WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4172     WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4173     WREG32(MC_VM_AGP_BASE, 0);
4174     WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4175     WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4176     if (radeon_mc_wait_for_idle(rdev)) {
4177         dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4178     }
4179     evergreen_mc_resume(rdev, &save);
4180     if (!ASIC_IS_NODCE(rdev)) {
4181         /* we need to own VRAM, so turn off the VGA renderer here
4182          * to stop it overwriting our objects */
4183         rv515_vga_render_disable(rdev);
4184     }
4185 }
4186 
4187 void si_vram_gtt_location(struct radeon_device *rdev,
4188               struct radeon_mc *mc)
4189 {
4190     if (mc->mc_vram_size > 0xFFC0000000ULL) {
4191         /* leave room for at least 1024M GTT */
4192         dev_warn(rdev->dev, "limiting VRAM\n");
4193         mc->real_vram_size = 0xFFC0000000ULL;
4194         mc->mc_vram_size = 0xFFC0000000ULL;
4195     }
4196     radeon_vram_location(rdev, &rdev->mc, 0);
4197     rdev->mc.gtt_base_align = 0;
4198     radeon_gtt_location(rdev, mc);
4199 }
4200 
4201 static int si_mc_init(struct radeon_device *rdev)
4202 {
4203     u32 tmp;
4204     int chansize, numchan;
4205 
4206     /* Get VRAM informations */
4207     rdev->mc.vram_is_ddr = true;
4208     tmp = RREG32(MC_ARB_RAMCFG);
4209     if (tmp & CHANSIZE_OVERRIDE) {
4210         chansize = 16;
4211     } else if (tmp & CHANSIZE_MASK) {
4212         chansize = 64;
4213     } else {
4214         chansize = 32;
4215     }
4216     tmp = RREG32(MC_SHARED_CHMAP);
4217     switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4218     case 0:
4219     default:
4220         numchan = 1;
4221         break;
4222     case 1:
4223         numchan = 2;
4224         break;
4225     case 2:
4226         numchan = 4;
4227         break;
4228     case 3:
4229         numchan = 8;
4230         break;
4231     case 4:
4232         numchan = 3;
4233         break;
4234     case 5:
4235         numchan = 6;
4236         break;
4237     case 6:
4238         numchan = 10;
4239         break;
4240     case 7:
4241         numchan = 12;
4242         break;
4243     case 8:
4244         numchan = 16;
4245         break;
4246     }
4247     rdev->mc.vram_width = numchan * chansize;
4248     /* Could aper size report 0 ? */
4249     rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4250     rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4251     /* size in MB on si */
4252     tmp = RREG32(CONFIG_MEMSIZE);
4253     /* some boards may have garbage in the upper 16 bits */
4254     if (tmp & 0xffff0000) {
4255         DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4256         if (tmp & 0xffff)
4257             tmp &= 0xffff;
4258     }
4259     rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4260     rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4261     rdev->mc.visible_vram_size = rdev->mc.aper_size;
4262     si_vram_gtt_location(rdev, &rdev->mc);
4263     radeon_update_bandwidth_info(rdev);
4264 
4265     return 0;
4266 }
4267 
4268 /*
4269  * GART
4270  */
4271 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4272 {
4273     /* flush hdp cache */
4274     WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4275 
4276     /* bits 0-15 are the VM contexts0-15 */
4277     WREG32(VM_INVALIDATE_REQUEST, 1);
4278 }
4279 
4280 static int si_pcie_gart_enable(struct radeon_device *rdev)
4281 {
4282     int r, i;
4283 
4284     if (rdev->gart.robj == NULL) {
4285         dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4286         return -EINVAL;
4287     }
4288     r = radeon_gart_table_vram_pin(rdev);
4289     if (r)
4290         return r;
4291     /* Setup TLB control */
4292     WREG32(MC_VM_MX_L1_TLB_CNTL,
4293            (0xA << 7) |
4294            ENABLE_L1_TLB |
4295            ENABLE_L1_FRAGMENT_PROCESSING |
4296            SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4297            ENABLE_ADVANCED_DRIVER_MODEL |
4298            SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4299     /* Setup L2 cache */
4300     WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4301            ENABLE_L2_FRAGMENT_PROCESSING |
4302            ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4303            ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4304            EFFECTIVE_L2_QUEUE_SIZE(7) |
4305            CONTEXT1_IDENTITY_ACCESS_MODE(1));
4306     WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4307     WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4308            BANK_SELECT(4) |
4309            L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4310     /* setup context0 */
4311     WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4312     WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4313     WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4314     WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4315             (u32)(rdev->dummy_page.addr >> 12));
4316     WREG32(VM_CONTEXT0_CNTL2, 0);
4317     WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4318                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4319 
4320     WREG32(0x15D4, 0);
4321     WREG32(0x15D8, 0);
4322     WREG32(0x15DC, 0);
4323 
4324     /* empty context1-15 */
4325     /* set vm size, must be a multiple of 4 */
4326     WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4327     WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4328     /* Assign the pt base to something valid for now; the pts used for
4329      * the VMs are determined by the application and setup and assigned
4330      * on the fly in the vm part of radeon_gart.c
4331      */
4332     for (i = 1; i < 16; i++) {
4333         if (i < 8)
4334             WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4335                    rdev->vm_manager.saved_table_addr[i]);
4336         else
4337             WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4338                    rdev->vm_manager.saved_table_addr[i]);
4339     }
4340 
4341     /* enable context1-15 */
4342     WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4343            (u32)(rdev->dummy_page.addr >> 12));
4344     WREG32(VM_CONTEXT1_CNTL2, 4);
4345     WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4346                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4347                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4348                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4349                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4350                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4351                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4352                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4353                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4354                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4355                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4356                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4357                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4358                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4359 
4360     si_pcie_gart_tlb_flush(rdev);
4361     DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4362          (unsigned)(rdev->mc.gtt_size >> 20),
4363          (unsigned long long)rdev->gart.table_addr);
4364     rdev->gart.ready = true;
4365     return 0;
4366 }
4367 
4368 static void si_pcie_gart_disable(struct radeon_device *rdev)
4369 {
4370     unsigned i;
4371 
4372     for (i = 1; i < 16; ++i) {
4373         uint32_t reg;
4374         if (i < 8)
4375             reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4376         else
4377             reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4378         rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4379     }
4380 
4381     /* Disable all tables */
4382     WREG32(VM_CONTEXT0_CNTL, 0);
4383     WREG32(VM_CONTEXT1_CNTL, 0);
4384     /* Setup TLB control */
4385     WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4386            SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4387     /* Setup L2 cache */
4388     WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4389            ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4390            EFFECTIVE_L2_QUEUE_SIZE(7) |
4391            CONTEXT1_IDENTITY_ACCESS_MODE(1));
4392     WREG32(VM_L2_CNTL2, 0);
4393     WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4394            L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4395     radeon_gart_table_vram_unpin(rdev);
4396 }
4397 
4398 static void si_pcie_gart_fini(struct radeon_device *rdev)
4399 {
4400     si_pcie_gart_disable(rdev);
4401     radeon_gart_table_vram_free(rdev);
4402     radeon_gart_fini(rdev);
4403 }
4404 
4405 /* vm parser */
4406 static bool si_vm_reg_valid(u32 reg)
4407 {
4408     /* context regs are fine */
4409     if (reg >= 0x28000)
4410         return true;
4411 
4412     /* shader regs are also fine */
4413     if (reg >= 0xB000 && reg < 0xC000)
4414         return true;
4415 
4416     /* check config regs */
4417     switch (reg) {
4418     case GRBM_GFX_INDEX:
4419     case CP_STRMOUT_CNTL:
4420     case VGT_VTX_VECT_EJECT_REG:
4421     case VGT_CACHE_INVALIDATION:
4422     case VGT_ESGS_RING_SIZE:
4423     case VGT_GSVS_RING_SIZE:
4424     case VGT_GS_VERTEX_REUSE:
4425     case VGT_PRIMITIVE_TYPE:
4426     case VGT_INDEX_TYPE:
4427     case VGT_NUM_INDICES:
4428     case VGT_NUM_INSTANCES:
4429     case VGT_TF_RING_SIZE:
4430     case VGT_HS_OFFCHIP_PARAM:
4431     case VGT_TF_MEMORY_BASE:
4432     case PA_CL_ENHANCE:
4433     case PA_SU_LINE_STIPPLE_VALUE:
4434     case PA_SC_LINE_STIPPLE_STATE:
4435     case PA_SC_ENHANCE:
4436     case SQC_CACHES:
4437     case SPI_STATIC_THREAD_MGMT_1:
4438     case SPI_STATIC_THREAD_MGMT_2:
4439     case SPI_STATIC_THREAD_MGMT_3:
4440     case SPI_PS_MAX_WAVE_ID:
4441     case SPI_CONFIG_CNTL:
4442     case SPI_CONFIG_CNTL_1:
4443     case TA_CNTL_AUX:
4444     case TA_CS_BC_BASE_ADDR:
4445         return true;
4446     default:
4447         DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4448         return false;
4449     }
4450 }
4451 
4452 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4453                   u32 *ib, struct radeon_cs_packet *pkt)
4454 {
4455     switch (pkt->opcode) {
4456     case PACKET3_NOP:
4457     case PACKET3_SET_BASE:
4458     case PACKET3_SET_CE_DE_COUNTERS:
4459     case PACKET3_LOAD_CONST_RAM:
4460     case PACKET3_WRITE_CONST_RAM:
4461     case PACKET3_WRITE_CONST_RAM_OFFSET:
4462     case PACKET3_DUMP_CONST_RAM:
4463     case PACKET3_INCREMENT_CE_COUNTER:
4464     case PACKET3_WAIT_ON_DE_COUNTER:
4465     case PACKET3_CE_WRITE:
4466         break;
4467     default:
4468         DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4469         return -EINVAL;
4470     }
4471     return 0;
4472 }
4473 
4474 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4475 {
4476     u32 start_reg, reg, i;
4477     u32 command = ib[idx + 4];
4478     u32 info = ib[idx + 1];
4479     u32 idx_value = ib[idx];
4480     if (command & PACKET3_CP_DMA_CMD_SAS) {
4481         /* src address space is register */
4482         if (((info & 0x60000000) >> 29) == 0) {
4483             start_reg = idx_value << 2;
4484             if (command & PACKET3_CP_DMA_CMD_SAIC) {
4485                 reg = start_reg;
4486                 if (!si_vm_reg_valid(reg)) {
4487                     DRM_ERROR("CP DMA Bad SRC register\n");
4488                     return -EINVAL;
4489                 }
4490             } else {
4491                 for (i = 0; i < (command & 0x1fffff); i++) {
4492                     reg = start_reg + (4 * i);
4493                     if (!si_vm_reg_valid(reg)) {
4494                         DRM_ERROR("CP DMA Bad SRC register\n");
4495                         return -EINVAL;
4496                     }
4497                 }
4498             }
4499         }
4500     }
4501     if (command & PACKET3_CP_DMA_CMD_DAS) {
4502         /* dst address space is register */
4503         if (((info & 0x00300000) >> 20) == 0) {
4504             start_reg = ib[idx + 2];
4505             if (command & PACKET3_CP_DMA_CMD_DAIC) {
4506                 reg = start_reg;
4507                 if (!si_vm_reg_valid(reg)) {
4508                     DRM_ERROR("CP DMA Bad DST register\n");
4509                     return -EINVAL;
4510                 }
4511             } else {
4512                 for (i = 0; i < (command & 0x1fffff); i++) {
4513                     reg = start_reg + (4 * i);
4514                     if (!si_vm_reg_valid(reg)) {
4515                         DRM_ERROR("CP DMA Bad DST register\n");
4516                         return -EINVAL;
4517                     }
4518                 }
4519             }
4520         }
4521     }
4522     return 0;
4523 }
4524 
4525 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4526                    u32 *ib, struct radeon_cs_packet *pkt)
4527 {
4528     int r;
4529     u32 idx = pkt->idx + 1;
4530     u32 idx_value = ib[idx];
4531     u32 start_reg, end_reg, reg, i;
4532 
4533     switch (pkt->opcode) {
4534     case PACKET3_NOP:
4535     case PACKET3_SET_BASE:
4536     case PACKET3_CLEAR_STATE:
4537     case PACKET3_INDEX_BUFFER_SIZE:
4538     case PACKET3_DISPATCH_DIRECT:
4539     case PACKET3_DISPATCH_INDIRECT:
4540     case PACKET3_ALLOC_GDS:
4541     case PACKET3_WRITE_GDS_RAM:
4542     case PACKET3_ATOMIC_GDS:
4543     case PACKET3_ATOMIC:
4544     case PACKET3_OCCLUSION_QUERY:
4545     case PACKET3_SET_PREDICATION:
4546     case PACKET3_COND_EXEC:
4547     case PACKET3_PRED_EXEC:
4548     case PACKET3_DRAW_INDIRECT:
4549     case PACKET3_DRAW_INDEX_INDIRECT:
4550     case PACKET3_INDEX_BASE:
4551     case PACKET3_DRAW_INDEX_2:
4552     case PACKET3_CONTEXT_CONTROL:
4553     case PACKET3_INDEX_TYPE:
4554     case PACKET3_DRAW_INDIRECT_MULTI:
4555     case PACKET3_DRAW_INDEX_AUTO:
4556     case PACKET3_DRAW_INDEX_IMMD:
4557     case PACKET3_NUM_INSTANCES:
4558     case PACKET3_DRAW_INDEX_MULTI_AUTO:
4559     case PACKET3_STRMOUT_BUFFER_UPDATE:
4560     case PACKET3_DRAW_INDEX_OFFSET_2:
4561     case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4562     case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4563     case PACKET3_MPEG_INDEX:
4564     case PACKET3_WAIT_REG_MEM:
4565     case PACKET3_MEM_WRITE:
4566     case PACKET3_PFP_SYNC_ME:
4567     case PACKET3_SURFACE_SYNC:
4568     case PACKET3_EVENT_WRITE:
4569     case PACKET3_EVENT_WRITE_EOP:
4570     case PACKET3_EVENT_WRITE_EOS:
4571     case PACKET3_SET_CONTEXT_REG:
4572     case PACKET3_SET_CONTEXT_REG_INDIRECT:
4573     case PACKET3_SET_SH_REG:
4574     case PACKET3_SET_SH_REG_OFFSET:
4575     case PACKET3_INCREMENT_DE_COUNTER:
4576     case PACKET3_WAIT_ON_CE_COUNTER:
4577     case PACKET3_WAIT_ON_AVAIL_BUFFER:
4578     case PACKET3_ME_WRITE:
4579         break;
4580     case PACKET3_COPY_DATA:
4581         if ((idx_value & 0xf00) == 0) {
4582             reg = ib[idx + 3] * 4;
4583             if (!si_vm_reg_valid(reg))
4584                 return -EINVAL;
4585         }
4586         break;
4587     case PACKET3_WRITE_DATA:
4588         if ((idx_value & 0xf00) == 0) {
4589             start_reg = ib[idx + 1] * 4;
4590             if (idx_value & 0x10000) {
4591                 if (!si_vm_reg_valid(start_reg))
4592                     return -EINVAL;
4593             } else {
4594                 for (i = 0; i < (pkt->count - 2); i++) {
4595                     reg = start_reg + (4 * i);
4596                     if (!si_vm_reg_valid(reg))
4597                         return -EINVAL;
4598                 }
4599             }
4600         }
4601         break;
4602     case PACKET3_COND_WRITE:
4603         if (idx_value & 0x100) {
4604             reg = ib[idx + 5] * 4;
4605             if (!si_vm_reg_valid(reg))
4606                 return -EINVAL;
4607         }
4608         break;
4609     case PACKET3_COPY_DW:
4610         if (idx_value & 0x2) {
4611             reg = ib[idx + 3] * 4;
4612             if (!si_vm_reg_valid(reg))
4613                 return -EINVAL;
4614         }
4615         break;
4616     case PACKET3_SET_CONFIG_REG:
4617         start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4618         end_reg = 4 * pkt->count + start_reg - 4;
4619         if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4620             (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4621             (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4622             DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4623             return -EINVAL;
4624         }
4625         for (i = 0; i < pkt->count; i++) {
4626             reg = start_reg + (4 * i);
4627             if (!si_vm_reg_valid(reg))
4628                 return -EINVAL;
4629         }
4630         break;
4631     case PACKET3_CP_DMA:
4632         r = si_vm_packet3_cp_dma_check(ib, idx);
4633         if (r)
4634             return r;
4635         break;
4636     default:
4637         DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4638         return -EINVAL;
4639     }
4640     return 0;
4641 }
4642 
4643 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4644                        u32 *ib, struct radeon_cs_packet *pkt)
4645 {
4646     int r;
4647     u32 idx = pkt->idx + 1;
4648     u32 idx_value = ib[idx];
4649     u32 start_reg, reg, i;
4650 
4651     switch (pkt->opcode) {
4652     case PACKET3_NOP:
4653     case PACKET3_SET_BASE:
4654     case PACKET3_CLEAR_STATE:
4655     case PACKET3_DISPATCH_DIRECT:
4656     case PACKET3_DISPATCH_INDIRECT:
4657     case PACKET3_ALLOC_GDS:
4658     case PACKET3_WRITE_GDS_RAM:
4659     case PACKET3_ATOMIC_GDS:
4660     case PACKET3_ATOMIC:
4661     case PACKET3_OCCLUSION_QUERY:
4662     case PACKET3_SET_PREDICATION:
4663     case PACKET3_COND_EXEC:
4664     case PACKET3_PRED_EXEC:
4665     case PACKET3_CONTEXT_CONTROL:
4666     case PACKET3_STRMOUT_BUFFER_UPDATE:
4667     case PACKET3_WAIT_REG_MEM:
4668     case PACKET3_MEM_WRITE:
4669     case PACKET3_PFP_SYNC_ME:
4670     case PACKET3_SURFACE_SYNC:
4671     case PACKET3_EVENT_WRITE:
4672     case PACKET3_EVENT_WRITE_EOP:
4673     case PACKET3_EVENT_WRITE_EOS:
4674     case PACKET3_SET_CONTEXT_REG:
4675     case PACKET3_SET_CONTEXT_REG_INDIRECT:
4676     case PACKET3_SET_SH_REG:
4677     case PACKET3_SET_SH_REG_OFFSET:
4678     case PACKET3_INCREMENT_DE_COUNTER:
4679     case PACKET3_WAIT_ON_CE_COUNTER:
4680     case PACKET3_WAIT_ON_AVAIL_BUFFER:
4681     case PACKET3_ME_WRITE:
4682         break;
4683     case PACKET3_COPY_DATA:
4684         if ((idx_value & 0xf00) == 0) {
4685             reg = ib[idx + 3] * 4;
4686             if (!si_vm_reg_valid(reg))
4687                 return -EINVAL;
4688         }
4689         break;
4690     case PACKET3_WRITE_DATA:
4691         if ((idx_value & 0xf00) == 0) {
4692             start_reg = ib[idx + 1] * 4;
4693             if (idx_value & 0x10000) {
4694                 if (!si_vm_reg_valid(start_reg))
4695                     return -EINVAL;
4696             } else {
4697                 for (i = 0; i < (pkt->count - 2); i++) {
4698                     reg = start_reg + (4 * i);
4699                     if (!si_vm_reg_valid(reg))
4700                         return -EINVAL;
4701                 }
4702             }
4703         }
4704         break;
4705     case PACKET3_COND_WRITE:
4706         if (idx_value & 0x100) {
4707             reg = ib[idx + 5] * 4;
4708             if (!si_vm_reg_valid(reg))
4709                 return -EINVAL;
4710         }
4711         break;
4712     case PACKET3_COPY_DW:
4713         if (idx_value & 0x2) {
4714             reg = ib[idx + 3] * 4;
4715             if (!si_vm_reg_valid(reg))
4716                 return -EINVAL;
4717         }
4718         break;
4719     case PACKET3_CP_DMA:
4720         r = si_vm_packet3_cp_dma_check(ib, idx);
4721         if (r)
4722             return r;
4723         break;
4724     default:
4725         DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4726         return -EINVAL;
4727     }
4728     return 0;
4729 }
4730 
4731 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4732 {
4733     int ret = 0;
4734     u32 idx = 0, i;
4735     struct radeon_cs_packet pkt;
4736 
4737     do {
4738         pkt.idx = idx;
4739         pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4740         pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4741         pkt.one_reg_wr = 0;
4742         switch (pkt.type) {
4743         case RADEON_PACKET_TYPE0:
4744             dev_err(rdev->dev, "Packet0 not allowed!\n");
4745             ret = -EINVAL;
4746             break;
4747         case RADEON_PACKET_TYPE2:
4748             idx += 1;
4749             break;
4750         case RADEON_PACKET_TYPE3:
4751             pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4752             if (ib->is_const_ib)
4753                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4754             else {
4755                 switch (ib->ring) {
4756                 case RADEON_RING_TYPE_GFX_INDEX:
4757                     ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4758                     break;
4759                 case CAYMAN_RING_TYPE_CP1_INDEX:
4760                 case CAYMAN_RING_TYPE_CP2_INDEX:
4761                     ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4762                     break;
4763                 default:
4764                     dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4765                     ret = -EINVAL;
4766                     break;
4767                 }
4768             }
4769             idx += pkt.count + 2;
4770             break;
4771         default:
4772             dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4773             ret = -EINVAL;
4774             break;
4775         }
4776         if (ret) {
4777             for (i = 0; i < ib->length_dw; i++) {
4778                 if (i == idx)
4779                     printk("\t0x%08x <---\n", ib->ptr[i]);
4780                 else
4781                     printk("\t0x%08x\n", ib->ptr[i]);
4782             }
4783             break;
4784         }
4785     } while (idx < ib->length_dw);
4786 
4787     return ret;
4788 }
4789 
4790 /*
4791  * vm
4792  */
4793 int si_vm_init(struct radeon_device *rdev)
4794 {
4795     /* number of VMs */
4796     rdev->vm_manager.nvm = 16;
4797     /* base offset of vram pages */
4798     rdev->vm_manager.vram_base_offset = 0;
4799 
4800     return 0;
4801 }
4802 
4803 void si_vm_fini(struct radeon_device *rdev)
4804 {
4805 }
4806 
4807 /**
4808  * si_vm_decode_fault - print human readable fault info
4809  *
4810  * @rdev: radeon_device pointer
4811  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4812  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4813  *
4814  * Print human readable fault information (SI).
4815  */
4816 static void si_vm_decode_fault(struct radeon_device *rdev,
4817                    u32 status, u32 addr)
4818 {
4819     u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4820     u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4821     u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4822     char *block;
4823 
4824     if (rdev->family == CHIP_TAHITI) {
4825         switch (mc_id) {
4826         case 160:
4827         case 144:
4828         case 96:
4829         case 80:
4830         case 224:
4831         case 208:
4832         case 32:
4833         case 16:
4834             block = "CB";
4835             break;
4836         case 161:
4837         case 145:
4838         case 97:
4839         case 81:
4840         case 225:
4841         case 209:
4842         case 33:
4843         case 17:
4844             block = "CB_FMASK";
4845             break;
4846         case 162:
4847         case 146:
4848         case 98:
4849         case 82:
4850         case 226:
4851         case 210:
4852         case 34:
4853         case 18:
4854             block = "CB_CMASK";
4855             break;
4856         case 163:
4857         case 147:
4858         case 99:
4859         case 83:
4860         case 227:
4861         case 211:
4862         case 35:
4863         case 19:
4864             block = "CB_IMMED";
4865             break;
4866         case 164:
4867         case 148:
4868         case 100:
4869         case 84:
4870         case 228:
4871         case 212:
4872         case 36:
4873         case 20:
4874             block = "DB";
4875             break;
4876         case 165:
4877         case 149:
4878         case 101:
4879         case 85:
4880         case 229:
4881         case 213:
4882         case 37:
4883         case 21:
4884             block = "DB_HTILE";
4885             break;
4886         case 167:
4887         case 151:
4888         case 103:
4889         case 87:
4890         case 231:
4891         case 215:
4892         case 39:
4893         case 23:
4894             block = "DB_STEN";
4895             break;
4896         case 72:
4897         case 68:
4898         case 64:
4899         case 8:
4900         case 4:
4901         case 0:
4902         case 136:
4903         case 132:
4904         case 128:
4905         case 200:
4906         case 196:
4907         case 192:
4908             block = "TC";
4909             break;
4910         case 112:
4911         case 48:
4912             block = "CP";
4913             break;
4914         case 49:
4915         case 177:
4916         case 50:
4917         case 178:
4918             block = "SH";
4919             break;
4920         case 53:
4921         case 190:
4922             block = "VGT";
4923             break;
4924         case 117:
4925             block = "IH";
4926             break;
4927         case 51:
4928         case 115:
4929             block = "RLC";
4930             break;
4931         case 119:
4932         case 183:
4933             block = "DMA0";
4934             break;
4935         case 61:
4936             block = "DMA1";
4937             break;
4938         case 248:
4939         case 120:
4940             block = "HDP";
4941             break;
4942         default:
4943             block = "unknown";
4944             break;
4945         }
4946     } else {
4947         switch (mc_id) {
4948         case 32:
4949         case 16:
4950         case 96:
4951         case 80:
4952         case 160:
4953         case 144:
4954         case 224:
4955         case 208:
4956             block = "CB";
4957             break;
4958         case 33:
4959         case 17:
4960         case 97:
4961         case 81:
4962         case 161:
4963         case 145:
4964         case 225:
4965         case 209:
4966             block = "CB_FMASK";
4967             break;
4968         case 34:
4969         case 18:
4970         case 98:
4971         case 82:
4972         case 162:
4973         case 146:
4974         case 226:
4975         case 210:
4976             block = "CB_CMASK";
4977             break;
4978         case 35:
4979         case 19:
4980         case 99:
4981         case 83:
4982         case 163:
4983         case 147:
4984         case 227:
4985         case 211:
4986             block = "CB_IMMED";
4987             break;
4988         case 36:
4989         case 20:
4990         case 100:
4991         case 84:
4992         case 164:
4993         case 148:
4994         case 228:
4995         case 212:
4996             block = "DB";
4997             break;
4998         case 37:
4999         case 21:
5000         case 101:
5001         case 85:
5002         case 165:
5003         case 149:
5004         case 229:
5005         case 213:
5006             block = "DB_HTILE";
5007             break;
5008         case 39:
5009         case 23:
5010         case 103:
5011         case 87:
5012         case 167:
5013         case 151:
5014         case 231:
5015         case 215:
5016             block = "DB_STEN";
5017             break;
5018         case 72:
5019         case 68:
5020         case 8:
5021         case 4:
5022         case 136:
5023         case 132:
5024         case 200:
5025         case 196:
5026             block = "TC";
5027             break;
5028         case 112:
5029         case 48:
5030             block = "CP";
5031             break;
5032         case 49:
5033         case 177:
5034         case 50:
5035         case 178:
5036             block = "SH";
5037             break;
5038         case 53:
5039             block = "VGT";
5040             break;
5041         case 117:
5042             block = "IH";
5043             break;
5044         case 51:
5045         case 115:
5046             block = "RLC";
5047             break;
5048         case 119:
5049         case 183:
5050             block = "DMA0";
5051             break;
5052         case 61:
5053             block = "DMA1";
5054             break;
5055         case 248:
5056         case 120:
5057             block = "HDP";
5058             break;
5059         default:
5060             block = "unknown";
5061             break;
5062         }
5063     }
5064 
5065     printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5066            protections, vmid, addr,
5067            (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5068            block, mc_id);
5069 }
5070 
5071 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5072          unsigned vm_id, uint64_t pd_addr)
5073 {
5074     /* write new base address */
5075     radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5076     radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5077                  WRITE_DATA_DST_SEL(0)));
5078 
5079     if (vm_id < 8) {
5080         radeon_ring_write(ring,
5081                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5082     } else {
5083         radeon_ring_write(ring,
5084                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5085     }
5086     radeon_ring_write(ring, 0);
5087     radeon_ring_write(ring, pd_addr >> 12);
5088 
5089     /* flush hdp cache */
5090     radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5091     radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5092                  WRITE_DATA_DST_SEL(0)));
5093     radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5094     radeon_ring_write(ring, 0);
5095     radeon_ring_write(ring, 0x1);
5096 
5097     /* bits 0-15 are the VM contexts0-15 */
5098     radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5099     radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5100                  WRITE_DATA_DST_SEL(0)));
5101     radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5102     radeon_ring_write(ring, 0);
5103     radeon_ring_write(ring, 1 << vm_id);
5104 
5105     /* wait for the invalidate to complete */
5106     radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5107     radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5108                  WAIT_REG_MEM_ENGINE(0))); /* me */
5109     radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5110     radeon_ring_write(ring, 0);
5111     radeon_ring_write(ring, 0); /* ref */
5112     radeon_ring_write(ring, 0); /* mask */
5113     radeon_ring_write(ring, 0x20); /* poll interval */
5114 
5115     /* sync PFP to ME, otherwise we might get invalid PFP reads */
5116     radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5117     radeon_ring_write(ring, 0x0);
5118 }
5119 
5120 /*
5121  *  Power and clock gating
5122  */
5123 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5124 {
5125     int i;
5126 
5127     for (i = 0; i < rdev->usec_timeout; i++) {
5128         if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5129             break;
5130         udelay(1);
5131     }
5132 
5133     for (i = 0; i < rdev->usec_timeout; i++) {
5134         if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5135             break;
5136         udelay(1);
5137     }
5138 }
5139 
5140 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5141                      bool enable)
5142 {
5143     u32 tmp = RREG32(CP_INT_CNTL_RING0);
5144     u32 mask;
5145     int i;
5146 
5147     if (enable)
5148         tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5149     else
5150         tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5151     WREG32(CP_INT_CNTL_RING0, tmp);
5152 
5153     if (!enable) {
5154         /* read a gfx register */
5155         tmp = RREG32(DB_DEPTH_INFO);
5156 
5157         mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5158         for (i = 0; i < rdev->usec_timeout; i++) {
5159             if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5160                 break;
5161             udelay(1);
5162         }
5163     }
5164 }
5165 
5166 static void si_set_uvd_dcm(struct radeon_device *rdev,
5167                bool sw_mode)
5168 {
5169     u32 tmp, tmp2;
5170 
5171     tmp = RREG32(UVD_CGC_CTRL);
5172     tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5173     tmp |= DCM | CG_DT(1) | CLK_OD(4);
5174 
5175     if (sw_mode) {
5176         tmp &= ~0x7ffff800;
5177         tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5178     } else {
5179         tmp |= 0x7ffff800;
5180         tmp2 = 0;
5181     }
5182 
5183     WREG32(UVD_CGC_CTRL, tmp);
5184     WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5185 }
5186 
5187 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5188 {
5189     bool hw_mode = true;
5190 
5191     if (hw_mode) {
5192         si_set_uvd_dcm(rdev, false);
5193     } else {
5194         u32 tmp = RREG32(UVD_CGC_CTRL);
5195         tmp &= ~DCM;
5196         WREG32(UVD_CGC_CTRL, tmp);
5197     }
5198 }
5199 
5200 static u32 si_halt_rlc(struct radeon_device *rdev)
5201 {
5202     u32 data, orig;
5203 
5204     orig = data = RREG32(RLC_CNTL);
5205 
5206     if (data & RLC_ENABLE) {
5207         data &= ~RLC_ENABLE;
5208         WREG32(RLC_CNTL, data);
5209 
5210         si_wait_for_rlc_serdes(rdev);
5211     }
5212 
5213     return orig;
5214 }
5215 
5216 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5217 {
5218     u32 tmp;
5219 
5220     tmp = RREG32(RLC_CNTL);
5221     if (tmp != rlc)
5222         WREG32(RLC_CNTL, rlc);
5223 }
5224 
5225 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5226 {
5227     u32 data, orig;
5228 
5229     orig = data = RREG32(DMA_PG);
5230     if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5231         data |= PG_CNTL_ENABLE;
5232     else
5233         data &= ~PG_CNTL_ENABLE;
5234     if (orig != data)
5235         WREG32(DMA_PG, data);
5236 }
5237 
5238 static void si_init_dma_pg(struct radeon_device *rdev)
5239 {
5240     u32 tmp;
5241 
5242     WREG32(DMA_PGFSM_WRITE,  0x00002000);
5243     WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5244 
5245     for (tmp = 0; tmp < 5; tmp++)
5246         WREG32(DMA_PGFSM_WRITE, 0);
5247 }
5248 
5249 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5250                    bool enable)
5251 {
5252     u32 tmp;
5253 
5254     if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5255         tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5256         WREG32(RLC_TTOP_D, tmp);
5257 
5258         tmp = RREG32(RLC_PG_CNTL);
5259         tmp |= GFX_PG_ENABLE;
5260         WREG32(RLC_PG_CNTL, tmp);
5261 
5262         tmp = RREG32(RLC_AUTO_PG_CTRL);
5263         tmp |= AUTO_PG_EN;
5264         WREG32(RLC_AUTO_PG_CTRL, tmp);
5265     } else {
5266         tmp = RREG32(RLC_AUTO_PG_CTRL);
5267         tmp &= ~AUTO_PG_EN;
5268         WREG32(RLC_AUTO_PG_CTRL, tmp);
5269 
5270         tmp = RREG32(DB_RENDER_CONTROL);
5271     }
5272 }
5273 
5274 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5275 {
5276     u32 tmp;
5277 
5278     WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5279 
5280     tmp = RREG32(RLC_PG_CNTL);
5281     tmp |= GFX_PG_SRC;
5282     WREG32(RLC_PG_CNTL, tmp);
5283 
5284     WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5285 
5286     tmp = RREG32(RLC_AUTO_PG_CTRL);
5287 
5288     tmp &= ~GRBM_REG_SGIT_MASK;
5289     tmp |= GRBM_REG_SGIT(0x700);
5290     tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5291     WREG32(RLC_AUTO_PG_CTRL, tmp);
5292 }
5293 
5294 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5295 {
5296     u32 mask = 0, tmp, tmp1;
5297     int i;
5298 
5299     si_select_se_sh(rdev, se, sh);
5300     tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5301     tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5302     si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5303 
5304     tmp &= 0xffff0000;
5305 
5306     tmp |= tmp1;
5307     tmp >>= 16;
5308 
5309     for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5310         mask <<= 1;
5311         mask |= 1;
5312     }
5313 
5314     return (~tmp) & mask;
5315 }
5316 
5317 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5318 {
5319     u32 i, j, k, active_cu_number = 0;
5320     u32 mask, counter, cu_bitmap;
5321     u32 tmp = 0;
5322 
5323     for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5324         for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5325             mask = 1;
5326             cu_bitmap = 0;
5327             counter  = 0;
5328             for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5329                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5330                     if (counter < 2)
5331                         cu_bitmap |= mask;
5332                     counter++;
5333                 }
5334                 mask <<= 1;
5335             }
5336 
5337             active_cu_number += counter;
5338             tmp |= (cu_bitmap << (i * 16 + j * 8));
5339         }
5340     }
5341 
5342     WREG32(RLC_PG_AO_CU_MASK, tmp);
5343 
5344     tmp = RREG32(RLC_MAX_PG_CU);
5345     tmp &= ~MAX_PU_CU_MASK;
5346     tmp |= MAX_PU_CU(active_cu_number);
5347     WREG32(RLC_MAX_PG_CU, tmp);
5348 }
5349 
5350 static void si_enable_cgcg(struct radeon_device *rdev,
5351                bool enable)
5352 {
5353     u32 data, orig, tmp;
5354 
5355     orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5356 
5357     if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5358         si_enable_gui_idle_interrupt(rdev, true);
5359 
5360         WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5361 
5362         tmp = si_halt_rlc(rdev);
5363 
5364         WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5365         WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5366         WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5367 
5368         si_wait_for_rlc_serdes(rdev);
5369 
5370         si_update_rlc(rdev, tmp);
5371 
5372         WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5373 
5374         data |= CGCG_EN | CGLS_EN;
5375     } else {
5376         si_enable_gui_idle_interrupt(rdev, false);
5377 
5378         RREG32(CB_CGTT_SCLK_CTRL);
5379         RREG32(CB_CGTT_SCLK_CTRL);
5380         RREG32(CB_CGTT_SCLK_CTRL);
5381         RREG32(CB_CGTT_SCLK_CTRL);
5382 
5383         data &= ~(CGCG_EN | CGLS_EN);
5384     }
5385 
5386     if (orig != data)
5387         WREG32(RLC_CGCG_CGLS_CTRL, data);
5388 }
5389 
5390 static void si_enable_mgcg(struct radeon_device *rdev,
5391                bool enable)
5392 {
5393     u32 data, orig, tmp = 0;
5394 
5395     if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5396         orig = data = RREG32(CGTS_SM_CTRL_REG);
5397         data = 0x96940200;
5398         if (orig != data)
5399             WREG32(CGTS_SM_CTRL_REG, data);
5400 
5401         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5402             orig = data = RREG32(CP_MEM_SLP_CNTL);
5403             data |= CP_MEM_LS_EN;
5404             if (orig != data)
5405                 WREG32(CP_MEM_SLP_CNTL, data);
5406         }
5407 
5408         orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5409         data &= 0xffffffc0;
5410         if (orig != data)
5411             WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5412 
5413         tmp = si_halt_rlc(rdev);
5414 
5415         WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5416         WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5417         WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5418 
5419         si_update_rlc(rdev, tmp);
5420     } else {
5421         orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5422         data |= 0x00000003;
5423         if (orig != data)
5424             WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5425 
5426         data = RREG32(CP_MEM_SLP_CNTL);
5427         if (data & CP_MEM_LS_EN) {
5428             data &= ~CP_MEM_LS_EN;
5429             WREG32(CP_MEM_SLP_CNTL, data);
5430         }
5431         orig = data = RREG32(CGTS_SM_CTRL_REG);
5432         data |= LS_OVERRIDE | OVERRIDE;
5433         if (orig != data)
5434             WREG32(CGTS_SM_CTRL_REG, data);
5435 
5436         tmp = si_halt_rlc(rdev);
5437 
5438         WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5439         WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5440         WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5441 
5442         si_update_rlc(rdev, tmp);
5443     }
5444 }
5445 
5446 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5447                    bool enable)
5448 {
5449     u32 orig, data, tmp;
5450 
5451     if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5452         tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5453         tmp |= 0x3fff;
5454         WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5455 
5456         orig = data = RREG32(UVD_CGC_CTRL);
5457         data |= DCM;
5458         if (orig != data)
5459             WREG32(UVD_CGC_CTRL, data);
5460 
5461         WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5462         WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5463     } else {
5464         tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5465         tmp &= ~0x3fff;
5466         WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5467 
5468         orig = data = RREG32(UVD_CGC_CTRL);
5469         data &= ~DCM;
5470         if (orig != data)
5471             WREG32(UVD_CGC_CTRL, data);
5472 
5473         WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5474         WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5475     }
5476 }
5477 
5478 static const u32 mc_cg_registers[] =
5479 {
5480     MC_HUB_MISC_HUB_CG,
5481     MC_HUB_MISC_SIP_CG,
5482     MC_HUB_MISC_VM_CG,
5483     MC_XPB_CLK_GAT,
5484     ATC_MISC_CG,
5485     MC_CITF_MISC_WR_CG,
5486     MC_CITF_MISC_RD_CG,
5487     MC_CITF_MISC_VM_CG,
5488     VM_L2_CG,
5489 };
5490 
5491 static void si_enable_mc_ls(struct radeon_device *rdev,
5492                 bool enable)
5493 {
5494     int i;
5495     u32 orig, data;
5496 
5497     for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5498         orig = data = RREG32(mc_cg_registers[i]);
5499         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5500             data |= MC_LS_ENABLE;
5501         else
5502             data &= ~MC_LS_ENABLE;
5503         if (data != orig)
5504             WREG32(mc_cg_registers[i], data);
5505     }
5506 }
5507 
5508 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5509                    bool enable)
5510 {
5511     int i;
5512     u32 orig, data;
5513 
5514     for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5515         orig = data = RREG32(mc_cg_registers[i]);
5516         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5517             data |= MC_CG_ENABLE;
5518         else
5519             data &= ~MC_CG_ENABLE;
5520         if (data != orig)
5521             WREG32(mc_cg_registers[i], data);
5522     }
5523 }
5524 
5525 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5526                    bool enable)
5527 {
5528     u32 orig, data, offset;
5529     int i;
5530 
5531     if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5532         for (i = 0; i < 2; i++) {
5533             if (i == 0)
5534                 offset = DMA0_REGISTER_OFFSET;
5535             else
5536                 offset = DMA1_REGISTER_OFFSET;
5537             orig = data = RREG32(DMA_POWER_CNTL + offset);
5538             data &= ~MEM_POWER_OVERRIDE;
5539             if (data != orig)
5540                 WREG32(DMA_POWER_CNTL + offset, data);
5541             WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5542         }
5543     } else {
5544         for (i = 0; i < 2; i++) {
5545             if (i == 0)
5546                 offset = DMA0_REGISTER_OFFSET;
5547             else
5548                 offset = DMA1_REGISTER_OFFSET;
5549             orig = data = RREG32(DMA_POWER_CNTL + offset);
5550             data |= MEM_POWER_OVERRIDE;
5551             if (data != orig)
5552                 WREG32(DMA_POWER_CNTL + offset, data);
5553 
5554             orig = data = RREG32(DMA_CLK_CTRL + offset);
5555             data = 0xff000000;
5556             if (data != orig)
5557                 WREG32(DMA_CLK_CTRL + offset, data);
5558         }
5559     }
5560 }
5561 
5562 static void si_enable_bif_mgls(struct radeon_device *rdev,
5563                    bool enable)
5564 {
5565     u32 orig, data;
5566 
5567     orig = data = RREG32_PCIE(PCIE_CNTL2);
5568 
5569     if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5570         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5571             REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5572     else
5573         data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5574               REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5575 
5576     if (orig != data)
5577         WREG32_PCIE(PCIE_CNTL2, data);
5578 }
5579 
5580 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5581                    bool enable)
5582 {
5583     u32 orig, data;
5584 
5585     orig = data = RREG32(HDP_HOST_PATH_CNTL);
5586 
5587     if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5588         data &= ~CLOCK_GATING_DIS;
5589     else
5590         data |= CLOCK_GATING_DIS;
5591 
5592     if (orig != data)
5593         WREG32(HDP_HOST_PATH_CNTL, data);
5594 }
5595 
5596 static void si_enable_hdp_ls(struct radeon_device *rdev,
5597                  bool enable)
5598 {
5599     u32 orig, data;
5600 
5601     orig = data = RREG32(HDP_MEM_POWER_LS);
5602 
5603     if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5604         data |= HDP_LS_ENABLE;
5605     else
5606         data &= ~HDP_LS_ENABLE;
5607 
5608     if (orig != data)
5609         WREG32(HDP_MEM_POWER_LS, data);
5610 }
5611 
5612 static void si_update_cg(struct radeon_device *rdev,
5613              u32 block, bool enable)
5614 {
5615     if (block & RADEON_CG_BLOCK_GFX) {
5616         si_enable_gui_idle_interrupt(rdev, false);
5617         /* order matters! */
5618         if (enable) {
5619             si_enable_mgcg(rdev, true);
5620             si_enable_cgcg(rdev, true);
5621         } else {
5622             si_enable_cgcg(rdev, false);
5623             si_enable_mgcg(rdev, false);
5624         }
5625         si_enable_gui_idle_interrupt(rdev, true);
5626     }
5627 
5628     if (block & RADEON_CG_BLOCK_MC) {
5629         si_enable_mc_mgcg(rdev, enable);
5630         si_enable_mc_ls(rdev, enable);
5631     }
5632 
5633     if (block & RADEON_CG_BLOCK_SDMA) {
5634         si_enable_dma_mgcg(rdev, enable);
5635     }
5636 
5637     if (block & RADEON_CG_BLOCK_BIF) {
5638         si_enable_bif_mgls(rdev, enable);
5639     }
5640 
5641     if (block & RADEON_CG_BLOCK_UVD) {
5642         if (rdev->has_uvd) {
5643             si_enable_uvd_mgcg(rdev, enable);
5644         }
5645     }
5646 
5647     if (block & RADEON_CG_BLOCK_HDP) {
5648         si_enable_hdp_mgcg(rdev, enable);
5649         si_enable_hdp_ls(rdev, enable);
5650     }
5651 }
5652 
5653 static void si_init_cg(struct radeon_device *rdev)
5654 {
5655     si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5656                 RADEON_CG_BLOCK_MC |
5657                 RADEON_CG_BLOCK_SDMA |
5658                 RADEON_CG_BLOCK_BIF |
5659                 RADEON_CG_BLOCK_HDP), true);
5660     if (rdev->has_uvd) {
5661         si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5662         si_init_uvd_internal_cg(rdev);
5663     }
5664 }
5665 
5666 static void si_fini_cg(struct radeon_device *rdev)
5667 {
5668     if (rdev->has_uvd) {
5669         si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5670     }
5671     si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5672                 RADEON_CG_BLOCK_MC |
5673                 RADEON_CG_BLOCK_SDMA |
5674                 RADEON_CG_BLOCK_BIF |
5675                 RADEON_CG_BLOCK_HDP), false);
5676 }
5677 
5678 u32 si_get_csb_size(struct radeon_device *rdev)
5679 {
5680     u32 count = 0;
5681     const struct cs_section_def *sect = NULL;
5682     const struct cs_extent_def *ext = NULL;
5683 
5684     if (rdev->rlc.cs_data == NULL)
5685         return 0;
5686 
5687     /* begin clear state */
5688     count += 2;
5689     /* context control state */
5690     count += 3;
5691 
5692     for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5693         for (ext = sect->section; ext->extent != NULL; ++ext) {
5694             if (sect->id == SECT_CONTEXT)
5695                 count += 2 + ext->reg_count;
5696             else
5697                 return 0;
5698         }
5699     }
5700     /* pa_sc_raster_config */
5701     count += 3;
5702     /* end clear state */
5703     count += 2;
5704     /* clear state */
5705     count += 2;
5706 
5707     return count;
5708 }
5709 
5710 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5711 {
5712     u32 count = 0, i;
5713     const struct cs_section_def *sect = NULL;
5714     const struct cs_extent_def *ext = NULL;
5715 
5716     if (rdev->rlc.cs_data == NULL)
5717         return;
5718     if (buffer == NULL)
5719         return;
5720 
5721     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5722     buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5723 
5724     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5725     buffer[count++] = cpu_to_le32(0x80000000);
5726     buffer[count++] = cpu_to_le32(0x80000000);
5727 
5728     for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5729         for (ext = sect->section; ext->extent != NULL; ++ext) {
5730             if (sect->id == SECT_CONTEXT) {
5731                 buffer[count++] =
5732                     cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5733                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5734                 for (i = 0; i < ext->reg_count; i++)
5735                     buffer[count++] = cpu_to_le32(ext->extent[i]);
5736             } else {
5737                 return;
5738             }
5739         }
5740     }
5741 
5742     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5743     buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5744     switch (rdev->family) {
5745     case CHIP_TAHITI:
5746     case CHIP_PITCAIRN:
5747         buffer[count++] = cpu_to_le32(0x2a00126a);
5748         break;
5749     case CHIP_VERDE:
5750         buffer[count++] = cpu_to_le32(0x0000124a);
5751         break;
5752     case CHIP_OLAND:
5753         buffer[count++] = cpu_to_le32(0x00000082);
5754         break;
5755     case CHIP_HAINAN:
5756         buffer[count++] = cpu_to_le32(0x00000000);
5757         break;
5758     default:
5759         buffer[count++] = cpu_to_le32(0x00000000);
5760         break;
5761     }
5762 
5763     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5764     buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5765 
5766     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5767     buffer[count++] = cpu_to_le32(0);
5768 }
5769 
5770 static void si_init_pg(struct radeon_device *rdev)
5771 {
5772     if (rdev->pg_flags) {
5773         if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5774             si_init_dma_pg(rdev);
5775         }
5776         si_init_ao_cu_mask(rdev);
5777         if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5778             si_init_gfx_cgpg(rdev);
5779         } else {
5780             WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5781             WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5782         }
5783         si_enable_dma_pg(rdev, true);
5784         si_enable_gfx_cgpg(rdev, true);
5785     } else {
5786         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5787         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5788     }
5789 }
5790 
5791 static void si_fini_pg(struct radeon_device *rdev)
5792 {
5793     if (rdev->pg_flags) {
5794         si_enable_dma_pg(rdev, false);
5795         si_enable_gfx_cgpg(rdev, false);
5796     }
5797 }
5798 
5799 /*
5800  * RLC
5801  */
5802 void si_rlc_reset(struct radeon_device *rdev)
5803 {
5804     u32 tmp = RREG32(GRBM_SOFT_RESET);
5805 
5806     tmp |= SOFT_RESET_RLC;
5807     WREG32(GRBM_SOFT_RESET, tmp);
5808     udelay(50);
5809     tmp &= ~SOFT_RESET_RLC;
5810     WREG32(GRBM_SOFT_RESET, tmp);
5811     udelay(50);
5812 }
5813 
5814 static void si_rlc_stop(struct radeon_device *rdev)
5815 {
5816     WREG32(RLC_CNTL, 0);
5817 
5818     si_enable_gui_idle_interrupt(rdev, false);
5819 
5820     si_wait_for_rlc_serdes(rdev);
5821 }
5822 
5823 static void si_rlc_start(struct radeon_device *rdev)
5824 {
5825     WREG32(RLC_CNTL, RLC_ENABLE);
5826 
5827     si_enable_gui_idle_interrupt(rdev, true);
5828 
5829     udelay(50);
5830 }
5831 
5832 static bool si_lbpw_supported(struct radeon_device *rdev)
5833 {
5834     u32 tmp;
5835 
5836     /* Enable LBPW only for DDR3 */
5837     tmp = RREG32(MC_SEQ_MISC0);
5838     if ((tmp & 0xF0000000) == 0xB0000000)
5839         return true;
5840     return false;
5841 }
5842 
5843 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5844 {
5845     u32 tmp;
5846 
5847     tmp = RREG32(RLC_LB_CNTL);
5848     if (enable)
5849         tmp |= LOAD_BALANCE_ENABLE;
5850     else
5851         tmp &= ~LOAD_BALANCE_ENABLE;
5852     WREG32(RLC_LB_CNTL, tmp);
5853 
5854     if (!enable) {
5855         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5856         WREG32(SPI_LB_CU_MASK, 0x00ff);
5857     }
5858 }
5859 
5860 static int si_rlc_resume(struct radeon_device *rdev)
5861 {
5862     u32 i;
5863 
5864     if (!rdev->rlc_fw)
5865         return -EINVAL;
5866 
5867     si_rlc_stop(rdev);
5868 
5869     si_rlc_reset(rdev);
5870 
5871     si_init_pg(rdev);
5872 
5873     si_init_cg(rdev);
5874 
5875     WREG32(RLC_RL_BASE, 0);
5876     WREG32(RLC_RL_SIZE, 0);
5877     WREG32(RLC_LB_CNTL, 0);
5878     WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5879     WREG32(RLC_LB_CNTR_INIT, 0);
5880     WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5881 
5882     WREG32(RLC_MC_CNTL, 0);
5883     WREG32(RLC_UCODE_CNTL, 0);
5884 
5885     if (rdev->new_fw) {
5886         const struct rlc_firmware_header_v1_0 *hdr =
5887             (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5888         u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5889         const __le32 *fw_data = (const __le32 *)
5890             (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5891 
5892         radeon_ucode_print_rlc_hdr(&hdr->header);
5893 
5894         for (i = 0; i < fw_size; i++) {
5895             WREG32(RLC_UCODE_ADDR, i);
5896             WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5897         }
5898     } else {
5899         const __be32 *fw_data =
5900             (const __be32 *)rdev->rlc_fw->data;
5901         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5902             WREG32(RLC_UCODE_ADDR, i);
5903             WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5904         }
5905     }
5906     WREG32(RLC_UCODE_ADDR, 0);
5907 
5908     si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5909 
5910     si_rlc_start(rdev);
5911 
5912     return 0;
5913 }
5914 
5915 static void si_enable_interrupts(struct radeon_device *rdev)
5916 {
5917     u32 ih_cntl = RREG32(IH_CNTL);
5918     u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5919 
5920     ih_cntl |= ENABLE_INTR;
5921     ih_rb_cntl |= IH_RB_ENABLE;
5922     WREG32(IH_CNTL, ih_cntl);
5923     WREG32(IH_RB_CNTL, ih_rb_cntl);
5924     rdev->ih.enabled = true;
5925 }
5926 
5927 static void si_disable_interrupts(struct radeon_device *rdev)
5928 {
5929     u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5930     u32 ih_cntl = RREG32(IH_CNTL);
5931 
5932     ih_rb_cntl &= ~IH_RB_ENABLE;
5933     ih_cntl &= ~ENABLE_INTR;
5934     WREG32(IH_RB_CNTL, ih_rb_cntl);
5935     WREG32(IH_CNTL, ih_cntl);
5936     /* set rptr, wptr to 0 */
5937     WREG32(IH_RB_RPTR, 0);
5938     WREG32(IH_RB_WPTR, 0);
5939     rdev->ih.enabled = false;
5940     rdev->ih.rptr = 0;
5941 }
5942 
5943 static void si_disable_interrupt_state(struct radeon_device *rdev)
5944 {
5945     int i;
5946     u32 tmp;
5947 
5948     tmp = RREG32(CP_INT_CNTL_RING0) &
5949         (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5950     WREG32(CP_INT_CNTL_RING0, tmp);
5951     WREG32(CP_INT_CNTL_RING1, 0);
5952     WREG32(CP_INT_CNTL_RING2, 0);
5953     tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5954     WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5955     tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5956     WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5957     WREG32(GRBM_INT_CNTL, 0);
5958     WREG32(SRBM_INT_CNTL, 0);
5959     for (i = 0; i < rdev->num_crtc; i++)
5960         WREG32(INT_MASK + crtc_offsets[i], 0);
5961     for (i = 0; i < rdev->num_crtc; i++)
5962         WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5963 
5964     if (!ASIC_IS_NODCE(rdev)) {
5965         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5966 
5967         for (i = 0; i < 6; i++)
5968             WREG32_AND(DC_HPDx_INT_CONTROL(i),
5969                    DC_HPDx_INT_POLARITY);
5970     }
5971 }
5972 
5973 static int si_irq_init(struct radeon_device *rdev)
5974 {
5975     int ret = 0;
5976     int rb_bufsz;
5977     u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5978 
5979     /* allocate ring */
5980     ret = r600_ih_ring_alloc(rdev);
5981     if (ret)
5982         return ret;
5983 
5984     /* disable irqs */
5985     si_disable_interrupts(rdev);
5986 
5987     /* init rlc */
5988     ret = si_rlc_resume(rdev);
5989     if (ret) {
5990         r600_ih_ring_fini(rdev);
5991         return ret;
5992     }
5993 
5994     /* setup interrupt control */
5995     /* set dummy read address to dummy page address */
5996     WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
5997     interrupt_cntl = RREG32(INTERRUPT_CNTL);
5998     /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5999      * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6000      */
6001     interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6002     /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6003     interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6004     WREG32(INTERRUPT_CNTL, interrupt_cntl);
6005 
6006     WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6007     rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6008 
6009     ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6010               IH_WPTR_OVERFLOW_CLEAR |
6011               (rb_bufsz << 1));
6012 
6013     if (rdev->wb.enabled)
6014         ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6015 
6016     /* set the writeback address whether it's enabled or not */
6017     WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6018     WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6019 
6020     WREG32(IH_RB_CNTL, ih_rb_cntl);
6021 
6022     /* set rptr, wptr to 0 */
6023     WREG32(IH_RB_RPTR, 0);
6024     WREG32(IH_RB_WPTR, 0);
6025 
6026     /* Default settings for IH_CNTL (disabled at first) */
6027     ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6028     /* RPTR_REARM only works if msi's are enabled */
6029     if (rdev->msi_enabled)
6030         ih_cntl |= RPTR_REARM;
6031     WREG32(IH_CNTL, ih_cntl);
6032 
6033     /* force the active interrupt state to all disabled */
6034     si_disable_interrupt_state(rdev);
6035 
6036     pci_set_master(rdev->pdev);
6037 
6038     /* enable irqs */
6039     si_enable_interrupts(rdev);
6040 
6041     return ret;
6042 }
6043 
6044 /* The order we write back each register here is important */
6045 int si_irq_set(struct radeon_device *rdev)
6046 {
6047     int i;
6048     u32 cp_int_cntl;
6049     u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6050     u32 grbm_int_cntl = 0;
6051     u32 dma_cntl, dma_cntl1;
6052     u32 thermal_int = 0;
6053 
6054     if (!rdev->irq.installed) {
6055         WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6056         return -EINVAL;
6057     }
6058     /* don't enable anything if the ih is disabled */
6059     if (!rdev->ih.enabled) {
6060         si_disable_interrupts(rdev);
6061         /* force the active interrupt state to all disabled */
6062         si_disable_interrupt_state(rdev);
6063         return 0;
6064     }
6065 
6066     cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6067         (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6068 
6069     dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6070     dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6071 
6072     thermal_int = RREG32(CG_THERMAL_INT) &
6073         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6074 
6075     /* enable CP interrupts on all rings */
6076     if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6077         DRM_DEBUG("si_irq_set: sw int gfx\n");
6078         cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6079     }
6080     if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6081         DRM_DEBUG("si_irq_set: sw int cp1\n");
6082         cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6083     }
6084     if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6085         DRM_DEBUG("si_irq_set: sw int cp2\n");
6086         cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6087     }
6088     if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6089         DRM_DEBUG("si_irq_set: sw int dma\n");
6090         dma_cntl |= TRAP_ENABLE;
6091     }
6092 
6093     if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6094         DRM_DEBUG("si_irq_set: sw int dma1\n");
6095         dma_cntl1 |= TRAP_ENABLE;
6096     }
6097 
6098     WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6099     WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6100     WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6101 
6102     WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6103     WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6104 
6105     WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6106 
6107     if (rdev->irq.dpm_thermal) {
6108         DRM_DEBUG("dpm thermal\n");
6109         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6110     }
6111 
6112     for (i = 0; i < rdev->num_crtc; i++) {
6113         radeon_irq_kms_set_irq_n_enabled(
6114             rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6115             rdev->irq.crtc_vblank_int[i] ||
6116             atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6117     }
6118 
6119     for (i = 0; i < rdev->num_crtc; i++)
6120         WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6121 
6122     if (!ASIC_IS_NODCE(rdev)) {
6123         for (i = 0; i < 6; i++) {
6124             radeon_irq_kms_set_irq_n_enabled(
6125                 rdev, DC_HPDx_INT_CONTROL(i),
6126                 DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6127                 rdev->irq.hpd[i], "HPD", i);
6128         }
6129     }
6130 
6131     WREG32(CG_THERMAL_INT, thermal_int);
6132 
6133     /* posting read */
6134     RREG32(SRBM_STATUS);
6135 
6136     return 0;
6137 }
6138 
6139 /* The order we write back each register here is important */
6140 static inline void si_irq_ack(struct radeon_device *rdev)
6141 {
6142     int i, j;
6143     u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6144     u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6145 
6146     if (ASIC_IS_NODCE(rdev))
6147         return;
6148 
6149     for (i = 0; i < 6; i++) {
6150         disp_int[i] = RREG32(si_disp_int_status[i]);
6151         if (i < rdev->num_crtc)
6152             grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6153     }
6154 
6155     /* We write back each interrupt register in pairs of two */
6156     for (i = 0; i < rdev->num_crtc; i += 2) {
6157         for (j = i; j < (i + 2); j++) {
6158             if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6159                 WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6160                        GRPH_PFLIP_INT_CLEAR);
6161         }
6162 
6163         for (j = i; j < (i + 2); j++) {
6164             if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6165                 WREG32(VBLANK_STATUS + crtc_offsets[j],
6166                        VBLANK_ACK);
6167             if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6168                 WREG32(VLINE_STATUS + crtc_offsets[j],
6169                        VLINE_ACK);
6170         }
6171     }
6172 
6173     for (i = 0; i < 6; i++) {
6174         if (disp_int[i] & DC_HPD1_INTERRUPT)
6175             WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6176     }
6177 
6178     for (i = 0; i < 6; i++) {
6179         if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6180             WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6181     }
6182 }
6183 
6184 static void si_irq_disable(struct radeon_device *rdev)
6185 {
6186     si_disable_interrupts(rdev);
6187     /* Wait and acknowledge irq */
6188     mdelay(1);
6189     si_irq_ack(rdev);
6190     si_disable_interrupt_state(rdev);
6191 }
6192 
6193 static void si_irq_suspend(struct radeon_device *rdev)
6194 {
6195     si_irq_disable(rdev);
6196     si_rlc_stop(rdev);
6197 }
6198 
6199 static void si_irq_fini(struct radeon_device *rdev)
6200 {
6201     si_irq_suspend(rdev);
6202     r600_ih_ring_fini(rdev);
6203 }
6204 
6205 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6206 {
6207     u32 wptr, tmp;
6208 
6209     if (rdev->wb.enabled)
6210         wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6211     else
6212         wptr = RREG32(IH_RB_WPTR);
6213 
6214     if (wptr & RB_OVERFLOW) {
6215         wptr &= ~RB_OVERFLOW;
6216         /* When a ring buffer overflow happen start parsing interrupt
6217          * from the last not overwritten vector (wptr + 16). Hopefully
6218          * this should allow us to catchup.
6219          */
6220         dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6221              wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6222         rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6223         tmp = RREG32(IH_RB_CNTL);
6224         tmp |= IH_WPTR_OVERFLOW_CLEAR;
6225         WREG32(IH_RB_CNTL, tmp);
6226     }
6227     return (wptr & rdev->ih.ptr_mask);
6228 }
6229 
6230 /*        SI IV Ring
6231  * Each IV ring entry is 128 bits:
6232  * [7:0]    - interrupt source id
6233  * [31:8]   - reserved
6234  * [59:32]  - interrupt source data
6235  * [63:60]  - reserved
6236  * [71:64]  - RINGID
6237  * [79:72]  - VMID
6238  * [127:80] - reserved
6239  */
6240 int si_irq_process(struct radeon_device *rdev)
6241 {
6242     u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6243     u32 crtc_idx, hpd_idx;
6244     u32 mask;
6245     u32 wptr;
6246     u32 rptr;
6247     u32 src_id, src_data, ring_id;
6248     u32 ring_index;
6249     bool queue_hotplug = false;
6250     bool queue_dp = false;
6251     bool queue_thermal = false;
6252     u32 status, addr;
6253     const char *event_name;
6254 
6255     if (!rdev->ih.enabled || rdev->shutdown)
6256         return IRQ_NONE;
6257 
6258     wptr = si_get_ih_wptr(rdev);
6259 
6260 restart_ih:
6261     /* is somebody else already processing irqs? */
6262     if (atomic_xchg(&rdev->ih.lock, 1))
6263         return IRQ_NONE;
6264 
6265     rptr = rdev->ih.rptr;
6266     DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6267 
6268     /* Order reading of wptr vs. reading of IH ring data */
6269     rmb();
6270 
6271     /* display interrupts */
6272     si_irq_ack(rdev);
6273 
6274     while (rptr != wptr) {
6275         /* wptr/rptr are in bytes! */
6276         ring_index = rptr / 4;
6277         src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6278         src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6279         ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6280 
6281         switch (src_id) {
6282         case 1: /* D1 vblank/vline */
6283         case 2: /* D2 vblank/vline */
6284         case 3: /* D3 vblank/vline */
6285         case 4: /* D4 vblank/vline */
6286         case 5: /* D5 vblank/vline */
6287         case 6: /* D6 vblank/vline */
6288             crtc_idx = src_id - 1;
6289 
6290             if (src_data == 0) { /* vblank */
6291                 mask = LB_D1_VBLANK_INTERRUPT;
6292                 event_name = "vblank";
6293 
6294                 if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6295                     drm_handle_vblank(rdev->ddev, crtc_idx);
6296                     rdev->pm.vblank_sync = true;
6297                     wake_up(&rdev->irq.vblank_queue);
6298                 }
6299                 if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6300                     radeon_crtc_handle_vblank(rdev,
6301                                   crtc_idx);
6302                 }
6303 
6304             } else if (src_data == 1) { /* vline */
6305                 mask = LB_D1_VLINE_INTERRUPT;
6306                 event_name = "vline";
6307             } else {
6308                 DRM_DEBUG("Unhandled interrupt: %d %d\n",
6309                       src_id, src_data);
6310                 break;
6311             }
6312 
6313             if (!(disp_int[crtc_idx] & mask)) {
6314                 DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6315                       crtc_idx + 1, event_name);
6316             }
6317 
6318             disp_int[crtc_idx] &= ~mask;
6319             DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6320 
6321             break;
6322         case 8: /* D1 page flip */
6323         case 10: /* D2 page flip */
6324         case 12: /* D3 page flip */
6325         case 14: /* D4 page flip */
6326         case 16: /* D5 page flip */
6327         case 18: /* D6 page flip */
6328             DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6329             if (radeon_use_pflipirq > 0)
6330                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6331             break;
6332         case 42: /* HPD hotplug */
6333             if (src_data <= 5) {
6334                 hpd_idx = src_data;
6335                 mask = DC_HPD1_INTERRUPT;
6336                 queue_hotplug = true;
6337                 event_name = "HPD";
6338 
6339             } else if (src_data <= 11) {
6340                 hpd_idx = src_data - 6;
6341                 mask = DC_HPD1_RX_INTERRUPT;
6342                 queue_dp = true;
6343                 event_name = "HPD_RX";
6344 
6345             } else {
6346                 DRM_DEBUG("Unhandled interrupt: %d %d\n",
6347                       src_id, src_data);
6348                 break;
6349             }
6350 
6351             if (!(disp_int[hpd_idx] & mask))
6352                 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6353 
6354             disp_int[hpd_idx] &= ~mask;
6355             DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6356             break;
6357         case 96:
6358             DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6359             WREG32(SRBM_INT_ACK, 0x1);
6360             break;
6361         case 124: /* UVD */
6362             DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6363             radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6364             break;
6365         case 146:
6366         case 147:
6367             addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6368             status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6369             /* reset addr and status */
6370             WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6371             if (addr == 0x0 && status == 0x0)
6372                 break;
6373             dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6374             dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6375                 addr);
6376             dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6377                 status);
6378             si_vm_decode_fault(rdev, status, addr);
6379             break;
6380         case 176: /* RINGID0 CP_INT */
6381             radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6382             break;
6383         case 177: /* RINGID1 CP_INT */
6384             radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6385             break;
6386         case 178: /* RINGID2 CP_INT */
6387             radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6388             break;
6389         case 181: /* CP EOP event */
6390             DRM_DEBUG("IH: CP EOP\n");
6391             switch (ring_id) {
6392             case 0:
6393                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6394                 break;
6395             case 1:
6396                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6397                 break;
6398             case 2:
6399                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6400                 break;
6401             }
6402             break;
6403         case 224: /* DMA trap event */
6404             DRM_DEBUG("IH: DMA trap\n");
6405             radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6406             break;
6407         case 230: /* thermal low to high */
6408             DRM_DEBUG("IH: thermal low to high\n");
6409             rdev->pm.dpm.thermal.high_to_low = false;
6410             queue_thermal = true;
6411             break;
6412         case 231: /* thermal high to low */
6413             DRM_DEBUG("IH: thermal high to low\n");
6414             rdev->pm.dpm.thermal.high_to_low = true;
6415             queue_thermal = true;
6416             break;
6417         case 233: /* GUI IDLE */
6418             DRM_DEBUG("IH: GUI idle\n");
6419             break;
6420         case 244: /* DMA trap event */
6421             DRM_DEBUG("IH: DMA1 trap\n");
6422             radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6423             break;
6424         default:
6425             DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6426             break;
6427         }
6428 
6429         /* wptr/rptr are in bytes! */
6430         rptr += 16;
6431         rptr &= rdev->ih.ptr_mask;
6432         WREG32(IH_RB_RPTR, rptr);
6433     }
6434     if (queue_dp)
6435         schedule_work(&rdev->dp_work);
6436     if (queue_hotplug)
6437         schedule_delayed_work(&rdev->hotplug_work, 0);
6438     if (queue_thermal && rdev->pm.dpm_enabled)
6439         schedule_work(&rdev->pm.dpm.thermal.work);
6440     rdev->ih.rptr = rptr;
6441     atomic_set(&rdev->ih.lock, 0);
6442 
6443     /* make sure wptr hasn't changed while processing */
6444     wptr = si_get_ih_wptr(rdev);
6445     if (wptr != rptr)
6446         goto restart_ih;
6447 
6448     return IRQ_HANDLED;
6449 }
6450 
6451 /*
6452  * startup/shutdown callbacks
6453  */
6454 static void si_uvd_init(struct radeon_device *rdev)
6455 {
6456     int r;
6457 
6458     if (!rdev->has_uvd)
6459         return;
6460 
6461     r = radeon_uvd_init(rdev);
6462     if (r) {
6463         dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6464         /*
6465          * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6466          * to early fails uvd_v2_2_resume() and thus nothing happens
6467          * there. So it is pointless to try to go through that code
6468          * hence why we disable uvd here.
6469          */
6470         rdev->has_uvd = false;
6471         return;
6472     }
6473     rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6474     r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6475 }
6476 
6477 static void si_uvd_start(struct radeon_device *rdev)
6478 {
6479     int r;
6480 
6481     if (!rdev->has_uvd)
6482         return;
6483 
6484     r = uvd_v2_2_resume(rdev);
6485     if (r) {
6486         dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6487         goto error;
6488     }
6489     r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6490     if (r) {
6491         dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6492         goto error;
6493     }
6494     return;
6495 
6496 error:
6497     rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6498 }
6499 
6500 static void si_uvd_resume(struct radeon_device *rdev)
6501 {
6502     struct radeon_ring *ring;
6503     int r;
6504 
6505     if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6506         return;
6507 
6508     ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6509     r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6510     if (r) {
6511         dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6512         return;
6513     }
6514     r = uvd_v1_0_init(rdev);
6515     if (r) {
6516         dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6517         return;
6518     }
6519 }
6520 
6521 static void si_vce_init(struct radeon_device *rdev)
6522 {
6523     int r;
6524 
6525     if (!rdev->has_vce)
6526         return;
6527 
6528     r = radeon_vce_init(rdev);
6529     if (r) {
6530         dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6531         /*
6532          * At this point rdev->vce.vcpu_bo is NULL which trickles down
6533          * to early fails si_vce_start() and thus nothing happens
6534          * there. So it is pointless to try to go through that code
6535          * hence why we disable vce here.
6536          */
6537         rdev->has_vce = false;
6538         return;
6539     }
6540     rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6541     r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6542     rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6543     r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6544 }
6545 
6546 static void si_vce_start(struct radeon_device *rdev)
6547 {
6548     int r;
6549 
6550     if (!rdev->has_vce)
6551         return;
6552 
6553     r = radeon_vce_resume(rdev);
6554     if (r) {
6555         dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6556         goto error;
6557     }
6558     r = vce_v1_0_resume(rdev);
6559     if (r) {
6560         dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6561         goto error;
6562     }
6563     r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6564     if (r) {
6565         dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6566         goto error;
6567     }
6568     r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6569     if (r) {
6570         dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6571         goto error;
6572     }
6573     return;
6574 
6575 error:
6576     rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6577     rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6578 }
6579 
6580 static void si_vce_resume(struct radeon_device *rdev)
6581 {
6582     struct radeon_ring *ring;
6583     int r;
6584 
6585     if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6586         return;
6587 
6588     ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6589     r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6590     if (r) {
6591         dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6592         return;
6593     }
6594     ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6595     r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6596     if (r) {
6597         dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6598         return;
6599     }
6600     r = vce_v1_0_init(rdev);
6601     if (r) {
6602         dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6603         return;
6604     }
6605 }
6606 
6607 static int si_startup(struct radeon_device *rdev)
6608 {
6609     struct radeon_ring *ring;
6610     int r;
6611 
6612     /* enable pcie gen2/3 link */
6613     si_pcie_gen3_enable(rdev);
6614     /* enable aspm */
6615     si_program_aspm(rdev);
6616 
6617     /* scratch needs to be initialized before MC */
6618     r = r600_vram_scratch_init(rdev);
6619     if (r)
6620         return r;
6621 
6622     si_mc_program(rdev);
6623 
6624     if (!rdev->pm.dpm_enabled) {
6625         r = si_mc_load_microcode(rdev);
6626         if (r) {
6627             DRM_ERROR("Failed to load MC firmware!\n");
6628             return r;
6629         }
6630     }
6631 
6632     r = si_pcie_gart_enable(rdev);
6633     if (r)
6634         return r;
6635     si_gpu_init(rdev);
6636 
6637     /* allocate rlc buffers */
6638     if (rdev->family == CHIP_VERDE) {
6639         rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6640         rdev->rlc.reg_list_size =
6641             (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6642     }
6643     rdev->rlc.cs_data = si_cs_data;
6644     r = sumo_rlc_init(rdev);
6645     if (r) {
6646         DRM_ERROR("Failed to init rlc BOs!\n");
6647         return r;
6648     }
6649 
6650     /* allocate wb buffer */
6651     r = radeon_wb_init(rdev);
6652     if (r)
6653         return r;
6654 
6655     r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6656     if (r) {
6657         dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6658         return r;
6659     }
6660 
6661     r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6662     if (r) {
6663         dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6664         return r;
6665     }
6666 
6667     r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6668     if (r) {
6669         dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6670         return r;
6671     }
6672 
6673     r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6674     if (r) {
6675         dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6676         return r;
6677     }
6678 
6679     r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6680     if (r) {
6681         dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6682         return r;
6683     }
6684 
6685     si_uvd_start(rdev);
6686     si_vce_start(rdev);
6687 
6688     /* Enable IRQ */
6689     if (!rdev->irq.installed) {
6690         r = radeon_irq_kms_init(rdev);
6691         if (r)
6692             return r;
6693     }
6694 
6695     r = si_irq_init(rdev);
6696     if (r) {
6697         DRM_ERROR("radeon: IH init failed (%d).\n", r);
6698         radeon_irq_kms_fini(rdev);
6699         return r;
6700     }
6701     si_irq_set(rdev);
6702 
6703     ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6704     r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6705                  RADEON_CP_PACKET2);
6706     if (r)
6707         return r;
6708 
6709     ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6710     r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6711                  RADEON_CP_PACKET2);
6712     if (r)
6713         return r;
6714 
6715     ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6716     r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6717                  RADEON_CP_PACKET2);
6718     if (r)
6719         return r;
6720 
6721     ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6722     r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6723                  DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6724     if (r)
6725         return r;
6726 
6727     ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6728     r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6729                  DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6730     if (r)
6731         return r;
6732 
6733     r = si_cp_load_microcode(rdev);
6734     if (r)
6735         return r;
6736     r = si_cp_resume(rdev);
6737     if (r)
6738         return r;
6739 
6740     r = cayman_dma_resume(rdev);
6741     if (r)
6742         return r;
6743 
6744     si_uvd_resume(rdev);
6745     si_vce_resume(rdev);
6746 
6747     r = radeon_ib_pool_init(rdev);
6748     if (r) {
6749         dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6750         return r;
6751     }
6752 
6753     r = radeon_vm_manager_init(rdev);
6754     if (r) {
6755         dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6756         return r;
6757     }
6758 
6759     r = radeon_audio_init(rdev);
6760     if (r)
6761         return r;
6762 
6763     return 0;
6764 }
6765 
6766 int si_resume(struct radeon_device *rdev)
6767 {
6768     int r;
6769 
6770     /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6771      * posting will perform necessary task to bring back GPU into good
6772      * shape.
6773      */
6774     /* post card */
6775     atom_asic_init(rdev->mode_info.atom_context);
6776 
6777     /* init golden registers */
6778     si_init_golden_registers(rdev);
6779 
6780     if (rdev->pm.pm_method == PM_METHOD_DPM)
6781         radeon_pm_resume(rdev);
6782 
6783     rdev->accel_working = true;
6784     r = si_startup(rdev);
6785     if (r) {
6786         DRM_ERROR("si startup failed on resume\n");
6787         rdev->accel_working = false;
6788         return r;
6789     }
6790 
6791     return r;
6792 
6793 }
6794 
6795 int si_suspend(struct radeon_device *rdev)
6796 {
6797     radeon_pm_suspend(rdev);
6798     radeon_audio_fini(rdev);
6799     radeon_vm_manager_fini(rdev);
6800     si_cp_enable(rdev, false);
6801     cayman_dma_stop(rdev);
6802     if (rdev->has_uvd) {
6803         radeon_uvd_suspend(rdev);
6804         uvd_v1_0_fini(rdev);
6805     }
6806     if (rdev->has_vce)
6807         radeon_vce_suspend(rdev);
6808     si_fini_pg(rdev);
6809     si_fini_cg(rdev);
6810     si_irq_suspend(rdev);
6811     radeon_wb_disable(rdev);
6812     si_pcie_gart_disable(rdev);
6813     return 0;
6814 }
6815 
6816 /* Plan is to move initialization in that function and use
6817  * helper function so that radeon_device_init pretty much
6818  * do nothing more than calling asic specific function. This
6819  * should also allow to remove a bunch of callback function
6820  * like vram_info.
6821  */
6822 int si_init(struct radeon_device *rdev)
6823 {
6824     struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6825     int r;
6826 
6827     /* Read BIOS */
6828     if (!radeon_get_bios(rdev)) {
6829         if (ASIC_IS_AVIVO(rdev))
6830             return -EINVAL;
6831     }
6832     /* Must be an ATOMBIOS */
6833     if (!rdev->is_atom_bios) {
6834         dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6835         return -EINVAL;
6836     }
6837     r = radeon_atombios_init(rdev);
6838     if (r)
6839         return r;
6840 
6841     /* Post card if necessary */
6842     if (!radeon_card_posted(rdev)) {
6843         if (!rdev->bios) {
6844             dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6845             return -EINVAL;
6846         }
6847         DRM_INFO("GPU not posted. posting now...\n");
6848         atom_asic_init(rdev->mode_info.atom_context);
6849     }
6850     /* init golden registers */
6851     si_init_golden_registers(rdev);
6852     /* Initialize scratch registers */
6853     si_scratch_init(rdev);
6854     /* Initialize surface registers */
6855     radeon_surface_init(rdev);
6856     /* Initialize clocks */
6857     radeon_get_clock_info(rdev->ddev);
6858 
6859     /* Fence driver */
6860     radeon_fence_driver_init(rdev);
6861 
6862     /* initialize memory controller */
6863     r = si_mc_init(rdev);
6864     if (r)
6865         return r;
6866     /* Memory manager */
6867     r = radeon_bo_init(rdev);
6868     if (r)
6869         return r;
6870 
6871     if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6872         !rdev->rlc_fw || !rdev->mc_fw) {
6873         r = si_init_microcode(rdev);
6874         if (r) {
6875             DRM_ERROR("Failed to load firmware!\n");
6876             return r;
6877         }
6878     }
6879 
6880     /* Initialize power management */
6881     radeon_pm_init(rdev);
6882 
6883     ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6884     ring->ring_obj = NULL;
6885     r600_ring_init(rdev, ring, 1024 * 1024);
6886 
6887     ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6888     ring->ring_obj = NULL;
6889     r600_ring_init(rdev, ring, 1024 * 1024);
6890 
6891     ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6892     ring->ring_obj = NULL;
6893     r600_ring_init(rdev, ring, 1024 * 1024);
6894 
6895     ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6896     ring->ring_obj = NULL;
6897     r600_ring_init(rdev, ring, 64 * 1024);
6898 
6899     ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6900     ring->ring_obj = NULL;
6901     r600_ring_init(rdev, ring, 64 * 1024);
6902 
6903     si_uvd_init(rdev);
6904     si_vce_init(rdev);
6905 
6906     rdev->ih.ring_obj = NULL;
6907     r600_ih_ring_init(rdev, 64 * 1024);
6908 
6909     r = r600_pcie_gart_init(rdev);
6910     if (r)
6911         return r;
6912 
6913     rdev->accel_working = true;
6914     r = si_startup(rdev);
6915     if (r) {
6916         dev_err(rdev->dev, "disabling GPU acceleration\n");
6917         si_cp_fini(rdev);
6918         cayman_dma_fini(rdev);
6919         si_irq_fini(rdev);
6920         sumo_rlc_fini(rdev);
6921         radeon_wb_fini(rdev);
6922         radeon_ib_pool_fini(rdev);
6923         radeon_vm_manager_fini(rdev);
6924         radeon_irq_kms_fini(rdev);
6925         si_pcie_gart_fini(rdev);
6926         rdev->accel_working = false;
6927     }
6928 
6929     /* Don't start up if the MC ucode is missing.
6930      * The default clocks and voltages before the MC ucode
6931      * is loaded are not suffient for advanced operations.
6932      */
6933     if (!rdev->mc_fw) {
6934         DRM_ERROR("radeon: MC ucode required for NI+.\n");
6935         return -EINVAL;
6936     }
6937 
6938     return 0;
6939 }
6940 
6941 void si_fini(struct radeon_device *rdev)
6942 {
6943     radeon_pm_fini(rdev);
6944     si_cp_fini(rdev);
6945     cayman_dma_fini(rdev);
6946     si_fini_pg(rdev);
6947     si_fini_cg(rdev);
6948     si_irq_fini(rdev);
6949     sumo_rlc_fini(rdev);
6950     radeon_wb_fini(rdev);
6951     radeon_vm_manager_fini(rdev);
6952     radeon_ib_pool_fini(rdev);
6953     radeon_irq_kms_fini(rdev);
6954     if (rdev->has_uvd) {
6955         uvd_v1_0_fini(rdev);
6956         radeon_uvd_fini(rdev);
6957     }
6958     if (rdev->has_vce)
6959         radeon_vce_fini(rdev);
6960     si_pcie_gart_fini(rdev);
6961     r600_vram_scratch_fini(rdev);
6962     radeon_gem_fini(rdev);
6963     radeon_fence_driver_fini(rdev);
6964     radeon_bo_fini(rdev);
6965     radeon_atombios_fini(rdev);
6966     kfree(rdev->bios);
6967     rdev->bios = NULL;
6968 }
6969 
6970 /**
6971  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6972  *
6973  * @rdev: radeon_device pointer
6974  *
6975  * Fetches a GPU clock counter snapshot (SI).
6976  * Returns the 64 bit clock counter snapshot.
6977  */
6978 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6979 {
6980     uint64_t clock;
6981 
6982     mutex_lock(&rdev->gpu_clock_mutex);
6983     WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6984     clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6985         ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6986     mutex_unlock(&rdev->gpu_clock_mutex);
6987     return clock;
6988 }
6989 
6990 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6991 {
6992     unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6993     int r;
6994 
6995     /* bypass vclk and dclk with bclk */
6996     WREG32_P(CG_UPLL_FUNC_CNTL_2,
6997         VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6998         ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6999 
7000     /* put PLL in bypass mode */
7001     WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7002 
7003     if (!vclk || !dclk) {
7004         /* keep the Bypass mode */
7005         return 0;
7006     }
7007 
7008     r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7009                       16384, 0x03FFFFFF, 0, 128, 5,
7010                       &fb_div, &vclk_div, &dclk_div);
7011     if (r)
7012         return r;
7013 
7014     /* set RESET_ANTI_MUX to 0 */
7015     WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7016 
7017     /* set VCO_MODE to 1 */
7018     WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7019 
7020     /* disable sleep mode */
7021     WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7022 
7023     /* deassert UPLL_RESET */
7024     WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7025 
7026     mdelay(1);
7027 
7028     r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7029     if (r)
7030         return r;
7031 
7032     /* assert UPLL_RESET again */
7033     WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7034 
7035     /* disable spread spectrum. */
7036     WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7037 
7038     /* set feedback divider */
7039     WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7040 
7041     /* set ref divider to 0 */
7042     WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7043 
7044     if (fb_div < 307200)
7045         WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7046     else
7047         WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7048 
7049     /* set PDIV_A and PDIV_B */
7050     WREG32_P(CG_UPLL_FUNC_CNTL_2,
7051         UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7052         ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7053 
7054     /* give the PLL some time to settle */
7055     mdelay(15);
7056 
7057     /* deassert PLL_RESET */
7058     WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7059 
7060     mdelay(15);
7061 
7062     /* switch from bypass mode to normal mode */
7063     WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7064 
7065     r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7066     if (r)
7067         return r;
7068 
7069     /* switch VCLK and DCLK selection */
7070     WREG32_P(CG_UPLL_FUNC_CNTL_2,
7071         VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7072         ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7073 
7074     mdelay(100);
7075 
7076     return 0;
7077 }
7078 
7079 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7080 {
7081     struct pci_dev *root = rdev->pdev->bus->self;
7082     enum pci_bus_speed speed_cap;
7083     u32 speed_cntl, current_data_rate;
7084     int i;
7085     u16 tmp16;
7086 
7087     if (pci_is_root_bus(rdev->pdev->bus))
7088         return;
7089 
7090     if (radeon_pcie_gen2 == 0)
7091         return;
7092 
7093     if (rdev->flags & RADEON_IS_IGP)
7094         return;
7095 
7096     if (!(rdev->flags & RADEON_IS_PCIE))
7097         return;
7098 
7099     speed_cap = pcie_get_speed_cap(root);
7100     if (speed_cap == PCI_SPEED_UNKNOWN)
7101         return;
7102 
7103     if ((speed_cap != PCIE_SPEED_8_0GT) &&
7104         (speed_cap != PCIE_SPEED_5_0GT))
7105         return;
7106 
7107     speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7108     current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7109         LC_CURRENT_DATA_RATE_SHIFT;
7110     if (speed_cap == PCIE_SPEED_8_0GT) {
7111         if (current_data_rate == 2) {
7112             DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7113             return;
7114         }
7115         DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7116     } else if (speed_cap == PCIE_SPEED_5_0GT) {
7117         if (current_data_rate == 1) {
7118             DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7119             return;
7120         }
7121         DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7122     }
7123 
7124     if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
7125         return;
7126 
7127     if (speed_cap == PCIE_SPEED_8_0GT) {
7128         /* re-try equalization if gen3 is not already enabled */
7129         if (current_data_rate != 2) {
7130             u16 bridge_cfg, gpu_cfg;
7131             u16 bridge_cfg2, gpu_cfg2;
7132             u32 max_lw, current_lw, tmp;
7133 
7134             pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7135                           &bridge_cfg);
7136             pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
7137                           &gpu_cfg);
7138 
7139             tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7140             pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
7141 
7142             tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7143             pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
7144                            tmp16);
7145 
7146             tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7147             max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7148             current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7149 
7150             if (current_lw < max_lw) {
7151                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7152                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7153                     tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7154                     tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7155                     tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7156                     WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7157                 }
7158             }
7159 
7160             for (i = 0; i < 10; i++) {
7161                 /* check status */
7162                 pcie_capability_read_word(rdev->pdev,
7163                               PCI_EXP_DEVSTA,
7164                               &tmp16);
7165                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7166                     break;
7167 
7168                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7169                               &bridge_cfg);
7170                 pcie_capability_read_word(rdev->pdev,
7171                               PCI_EXP_LNKCTL,
7172                               &gpu_cfg);
7173 
7174                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7175                               &bridge_cfg2);
7176                 pcie_capability_read_word(rdev->pdev,
7177                               PCI_EXP_LNKCTL2,
7178                               &gpu_cfg2);
7179 
7180                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7181                 tmp |= LC_SET_QUIESCE;
7182                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7183 
7184                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7185                 tmp |= LC_REDO_EQ;
7186                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7187 
7188                 msleep(100);
7189 
7190                 /* linkctl */
7191                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7192                               &tmp16);
7193                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7194                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7195                 pcie_capability_write_word(root,
7196                                PCI_EXP_LNKCTL,
7197                                tmp16);
7198 
7199                 pcie_capability_read_word(rdev->pdev,
7200                               PCI_EXP_LNKCTL,
7201                               &tmp16);
7202                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7203                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7204                 pcie_capability_write_word(rdev->pdev,
7205                                PCI_EXP_LNKCTL,
7206                                tmp16);
7207 
7208                 /* linkctl2 */
7209                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7210                               &tmp16);
7211                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7212                        PCI_EXP_LNKCTL2_TX_MARGIN);
7213                 tmp16 |= (bridge_cfg2 &
7214                       (PCI_EXP_LNKCTL2_ENTER_COMP |
7215                        PCI_EXP_LNKCTL2_TX_MARGIN));
7216                 pcie_capability_write_word(root,
7217                                PCI_EXP_LNKCTL2,
7218                                tmp16);
7219 
7220                 pcie_capability_read_word(rdev->pdev,
7221                               PCI_EXP_LNKCTL2,
7222                               &tmp16);
7223                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7224                        PCI_EXP_LNKCTL2_TX_MARGIN);
7225                 tmp16 |= (gpu_cfg2 &
7226                       (PCI_EXP_LNKCTL2_ENTER_COMP |
7227                        PCI_EXP_LNKCTL2_TX_MARGIN));
7228                 pcie_capability_write_word(rdev->pdev,
7229                                PCI_EXP_LNKCTL2,
7230                                tmp16);
7231 
7232                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7233                 tmp &= ~LC_SET_QUIESCE;
7234                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7235             }
7236         }
7237     }
7238 
7239     /* set the link speed */
7240     speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7241     speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7242     WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7243 
7244     pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
7245     tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
7246     if (speed_cap == PCIE_SPEED_8_0GT)
7247         tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
7248     else if (speed_cap == PCIE_SPEED_5_0GT)
7249         tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
7250     else
7251         tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
7252     pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
7253 
7254     speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7255     speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7256     WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7257 
7258     for (i = 0; i < rdev->usec_timeout; i++) {
7259         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7260         if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7261             break;
7262         udelay(1);
7263     }
7264 }
7265 
7266 static void si_program_aspm(struct radeon_device *rdev)
7267 {
7268     u32 data, orig;
7269     bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7270     bool disable_clkreq = false;
7271 
7272     if (radeon_aspm == 0)
7273         return;
7274 
7275     if (!(rdev->flags & RADEON_IS_PCIE))
7276         return;
7277 
7278     orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7279     data &= ~LC_XMIT_N_FTS_MASK;
7280     data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7281     if (orig != data)
7282         WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7283 
7284     orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7285     data |= LC_GO_TO_RECOVERY;
7286     if (orig != data)
7287         WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7288 
7289     orig = data = RREG32_PCIE(PCIE_P_CNTL);
7290     data |= P_IGNORE_EDB_ERR;
7291     if (orig != data)
7292         WREG32_PCIE(PCIE_P_CNTL, data);
7293 
7294     orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7295     data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7296     data |= LC_PMI_TO_L1_DIS;
7297     if (!disable_l0s)
7298         data |= LC_L0S_INACTIVITY(7);
7299 
7300     if (!disable_l1) {
7301         data |= LC_L1_INACTIVITY(7);
7302         data &= ~LC_PMI_TO_L1_DIS;
7303         if (orig != data)
7304             WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7305 
7306         if (!disable_plloff_in_l1) {
7307             bool clk_req_support;
7308 
7309             orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7310             data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7311             data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7312             if (orig != data)
7313                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7314 
7315             orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7316             data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7317             data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7318             if (orig != data)
7319                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7320 
7321             orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7322             data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7323             data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7324             if (orig != data)
7325                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7326 
7327             orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7328             data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7329             data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7330             if (orig != data)
7331                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7332 
7333             if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7334                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7335                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7336                 if (orig != data)
7337                     WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7338 
7339                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7340                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7341                 if (orig != data)
7342                     WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7343 
7344                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7345                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7346                 if (orig != data)
7347                     WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7348 
7349                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7350                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7351                 if (orig != data)
7352                     WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7353 
7354                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7355                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7356                 if (orig != data)
7357                     WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7358 
7359                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7360                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7361                 if (orig != data)
7362                     WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7363 
7364                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7365                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7366                 if (orig != data)
7367                     WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7368 
7369                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7370                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7371                 if (orig != data)
7372                     WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7373             }
7374             orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7375             data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7376             data |= LC_DYN_LANES_PWR_STATE(3);
7377             if (orig != data)
7378                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7379 
7380             orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7381             data &= ~LS2_EXIT_TIME_MASK;
7382             if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7383                 data |= LS2_EXIT_TIME(5);
7384             if (orig != data)
7385                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7386 
7387             orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7388             data &= ~LS2_EXIT_TIME_MASK;
7389             if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7390                 data |= LS2_EXIT_TIME(5);
7391             if (orig != data)
7392                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7393 
7394             if (!disable_clkreq &&
7395                 !pci_is_root_bus(rdev->pdev->bus)) {
7396                 struct pci_dev *root = rdev->pdev->bus->self;
7397                 u32 lnkcap;
7398 
7399                 clk_req_support = false;
7400                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7401                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7402                     clk_req_support = true;
7403             } else {
7404                 clk_req_support = false;
7405             }
7406 
7407             if (clk_req_support) {
7408                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7409                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7410                 if (orig != data)
7411                     WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7412 
7413                 orig = data = RREG32(THM_CLK_CNTL);
7414                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7415                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7416                 if (orig != data)
7417                     WREG32(THM_CLK_CNTL, data);
7418 
7419                 orig = data = RREG32(MISC_CLK_CNTL);
7420                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7421                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7422                 if (orig != data)
7423                     WREG32(MISC_CLK_CNTL, data);
7424 
7425                 orig = data = RREG32(CG_CLKPIN_CNTL);
7426                 data &= ~BCLK_AS_XCLK;
7427                 if (orig != data)
7428                     WREG32(CG_CLKPIN_CNTL, data);
7429 
7430                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7431                 data &= ~FORCE_BIF_REFCLK_EN;
7432                 if (orig != data)
7433                     WREG32(CG_CLKPIN_CNTL_2, data);
7434 
7435                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7436                 data &= ~MPLL_CLKOUT_SEL_MASK;
7437                 data |= MPLL_CLKOUT_SEL(4);
7438                 if (orig != data)
7439                     WREG32(MPLL_BYPASSCLK_SEL, data);
7440 
7441                 orig = data = RREG32(SPLL_CNTL_MODE);
7442                 data &= ~SPLL_REFCLK_SEL_MASK;
7443                 if (orig != data)
7444                     WREG32(SPLL_CNTL_MODE, data);
7445             }
7446         }
7447     } else {
7448         if (orig != data)
7449             WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7450     }
7451 
7452     orig = data = RREG32_PCIE(PCIE_CNTL2);
7453     data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7454     if (orig != data)
7455         WREG32_PCIE(PCIE_CNTL2, data);
7456 
7457     if (!disable_l0s) {
7458         data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7459         if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7460             data = RREG32_PCIE(PCIE_LC_STATUS1);
7461             if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7462                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7463                 data &= ~LC_L0S_INACTIVITY_MASK;
7464                 if (orig != data)
7465                     WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7466             }
7467         }
7468     }
7469 }
7470 
7471 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7472 {
7473     unsigned i;
7474 
7475     /* make sure VCEPLL_CTLREQ is deasserted */
7476     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7477 
7478     mdelay(10);
7479 
7480     /* assert UPLL_CTLREQ */
7481     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7482 
7483     /* wait for CTLACK and CTLACK2 to get asserted */
7484     for (i = 0; i < 100; ++i) {
7485         uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7486         if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7487             break;
7488         mdelay(10);
7489     }
7490 
7491     /* deassert UPLL_CTLREQ */
7492     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7493 
7494     if (i == 100) {
7495         DRM_ERROR("Timeout setting UVD clocks!\n");
7496         return -ETIMEDOUT;
7497     }
7498 
7499     return 0;
7500 }
7501 
7502 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7503 {
7504     unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7505     int r;
7506 
7507     /* bypass evclk and ecclk with bclk */
7508     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7509              EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7510              ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7511 
7512     /* put PLL in bypass mode */
7513     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7514              ~VCEPLL_BYPASS_EN_MASK);
7515 
7516     if (!evclk || !ecclk) {
7517         /* keep the Bypass mode, put PLL to sleep */
7518         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7519                  ~VCEPLL_SLEEP_MASK);
7520         return 0;
7521     }
7522 
7523     r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7524                       16384, 0x03FFFFFF, 0, 128, 5,
7525                       &fb_div, &evclk_div, &ecclk_div);
7526     if (r)
7527         return r;
7528 
7529     /* set RESET_ANTI_MUX to 0 */
7530     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7531 
7532     /* set VCO_MODE to 1 */
7533     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7534              ~VCEPLL_VCO_MODE_MASK);
7535 
7536     /* toggle VCEPLL_SLEEP to 1 then back to 0 */
7537     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7538              ~VCEPLL_SLEEP_MASK);
7539     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7540 
7541     /* deassert VCEPLL_RESET */
7542     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7543 
7544     mdelay(1);
7545 
7546     r = si_vce_send_vcepll_ctlreq(rdev);
7547     if (r)
7548         return r;
7549 
7550     /* assert VCEPLL_RESET again */
7551     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7552 
7553     /* disable spread spectrum. */
7554     WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7555 
7556     /* set feedback divider */
7557     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7558 
7559     /* set ref divider to 0 */
7560     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7561 
7562     /* set PDIV_A and PDIV_B */
7563     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7564              VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7565              ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7566 
7567     /* give the PLL some time to settle */
7568     mdelay(15);
7569 
7570     /* deassert PLL_RESET */
7571     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7572 
7573     mdelay(15);
7574 
7575     /* switch from bypass mode to normal mode */
7576     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7577 
7578     r = si_vce_send_vcepll_ctlreq(rdev);
7579     if (r)
7580         return r;
7581 
7582     /* switch VCLK and DCLK selection */
7583     WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7584              EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7585              ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7586 
7587     mdelay(100);
7588 
7589     return 0;
7590 }