Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2008 Advanced Micro Devices, Inc.
0003  * Copyright 2008 Red Hat Inc.
0004  * Copyright 2009 Jerome Glisse.
0005  *
0006  * Permission is hereby granted, free of charge, to any person obtaining a
0007  * copy of this software and associated documentation files (the "Software"),
0008  * to deal in the Software without restriction, including without limitation
0009  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0010  * and/or sell copies of the Software, and to permit persons to whom the
0011  * Software is furnished to do so, subject to the following conditions:
0012  *
0013  * The above copyright notice and this permission notice shall be included in
0014  * all copies or substantial portions of the Software.
0015  *
0016  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0017  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0018  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0019  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0020  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0021  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0022  * OTHER DEALINGS IN THE SOFTWARE.
0023  *
0024  * Authors: Dave Airlie
0025  *          Alex Deucher
0026  *          Jerome Glisse
0027  */
0028 
0029 #include <linux/firmware.h>
0030 #include <linux/module.h>
0031 #include <linux/pci.h>
0032 #include <linux/seq_file.h>
0033 #include <linux/slab.h>
0034 
0035 #include <drm/drm_device.h>
0036 #include <drm/drm_file.h>
0037 #include <drm/drm_fourcc.h>
0038 #include <drm/drm_framebuffer.h>
0039 #include <drm/drm_vblank.h>
0040 #include <drm/radeon_drm.h>
0041 
0042 #include "atom.h"
0043 #include "r100_reg_safe.h"
0044 #include "r100d.h"
0045 #include "radeon.h"
0046 #include "radeon_asic.h"
0047 #include "radeon_reg.h"
0048 #include "rn50_reg_safe.h"
0049 #include "rs100d.h"
0050 #include "rv200d.h"
0051 #include "rv250d.h"
0052 
0053 /* Firmware Names */
0054 #define FIRMWARE_R100       "radeon/R100_cp.bin"
0055 #define FIRMWARE_R200       "radeon/R200_cp.bin"
0056 #define FIRMWARE_R300       "radeon/R300_cp.bin"
0057 #define FIRMWARE_R420       "radeon/R420_cp.bin"
0058 #define FIRMWARE_RS690      "radeon/RS690_cp.bin"
0059 #define FIRMWARE_RS600      "radeon/RS600_cp.bin"
0060 #define FIRMWARE_R520       "radeon/R520_cp.bin"
0061 
0062 MODULE_FIRMWARE(FIRMWARE_R100);
0063 MODULE_FIRMWARE(FIRMWARE_R200);
0064 MODULE_FIRMWARE(FIRMWARE_R300);
0065 MODULE_FIRMWARE(FIRMWARE_R420);
0066 MODULE_FIRMWARE(FIRMWARE_RS690);
0067 MODULE_FIRMWARE(FIRMWARE_RS600);
0068 MODULE_FIRMWARE(FIRMWARE_R520);
0069 
0070 #include "r100_track.h"
0071 
0072 /* This files gather functions specifics to:
0073  * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
0074  * and others in some cases.
0075  */
0076 
0077 static bool r100_is_in_vblank(struct radeon_device *rdev, int crtc)
0078 {
0079     if (crtc == 0) {
0080         if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR)
0081             return true;
0082         else
0083             return false;
0084     } else {
0085         if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR)
0086             return true;
0087         else
0088             return false;
0089     }
0090 }
0091 
0092 static bool r100_is_counter_moving(struct radeon_device *rdev, int crtc)
0093 {
0094     u32 vline1, vline2;
0095 
0096     if (crtc == 0) {
0097         vline1 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
0098         vline2 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
0099     } else {
0100         vline1 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
0101         vline2 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
0102     }
0103     if (vline1 != vline2)
0104         return true;
0105     else
0106         return false;
0107 }
0108 
0109 /**
0110  * r100_wait_for_vblank - vblank wait asic callback.
0111  *
0112  * @rdev: radeon_device pointer
0113  * @crtc: crtc to wait for vblank on
0114  *
0115  * Wait for vblank on the requested crtc (r1xx-r4xx).
0116  */
0117 void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
0118 {
0119     unsigned i = 0;
0120 
0121     if (crtc >= rdev->num_crtc)
0122         return;
0123 
0124     if (crtc == 0) {
0125         if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN))
0126             return;
0127     } else {
0128         if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN))
0129             return;
0130     }
0131 
0132     /* depending on when we hit vblank, we may be close to active; if so,
0133      * wait for another frame.
0134      */
0135     while (r100_is_in_vblank(rdev, crtc)) {
0136         if (i++ % 100 == 0) {
0137             if (!r100_is_counter_moving(rdev, crtc))
0138                 break;
0139         }
0140     }
0141 
0142     while (!r100_is_in_vblank(rdev, crtc)) {
0143         if (i++ % 100 == 0) {
0144             if (!r100_is_counter_moving(rdev, crtc))
0145                 break;
0146         }
0147     }
0148 }
0149 
0150 /**
0151  * r100_page_flip - pageflip callback.
0152  *
0153  * @rdev: radeon_device pointer
0154  * @crtc_id: crtc to cleanup pageflip on
0155  * @crtc_base: new address of the crtc (GPU MC address)
0156  * @async: asynchronous flip
0157  *
0158  * Does the actual pageflip (r1xx-r4xx).
0159  * During vblank we take the crtc lock and wait for the update_pending
0160  * bit to go high, when it does, we release the lock, and allow the
0161  * double buffered update to take place.
0162  */
0163 void r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool async)
0164 {
0165     struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
0166     uint32_t crtc_pitch, pitch_pixels;
0167     struct drm_framebuffer *fb = radeon_crtc->base.primary->fb;
0168     u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
0169     int i;
0170 
0171     /* Lock the graphics update lock */
0172     /* update the scanout addresses */
0173     WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
0174 
0175     /* update pitch */
0176     pitch_pixels = fb->pitches[0] / fb->format->cpp[0];
0177     crtc_pitch = DIV_ROUND_UP(pitch_pixels * fb->format->cpp[0] * 8,
0178                   fb->format->cpp[0] * 8 * 8);
0179     crtc_pitch |= crtc_pitch << 16;
0180     WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch);
0181 
0182     /* Wait for update_pending to go high. */
0183     for (i = 0; i < rdev->usec_timeout; i++) {
0184         if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET)
0185             break;
0186         udelay(1);
0187     }
0188     DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
0189 
0190     /* Unlock the lock, so double-buffering can take place inside vblank */
0191     tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
0192     WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
0193 
0194 }
0195 
0196 /**
0197  * r100_page_flip_pending - check if page flip is still pending
0198  *
0199  * @rdev: radeon_device pointer
0200  * @crtc_id: crtc to check
0201  *
0202  * Check if the last pagefilp is still pending (r1xx-r4xx).
0203  * Returns the current update pending status.
0204  */
0205 bool r100_page_flip_pending(struct radeon_device *rdev, int crtc_id)
0206 {
0207     struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
0208 
0209     /* Return current update_pending status: */
0210     return !!(RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) &
0211         RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET);
0212 }
0213 
0214 /**
0215  * r100_pm_get_dynpm_state - look up dynpm power state callback.
0216  *
0217  * @rdev: radeon_device pointer
0218  *
0219  * Look up the optimal power state based on the
0220  * current state of the GPU (r1xx-r5xx).
0221  * Used for dynpm only.
0222  */
0223 void r100_pm_get_dynpm_state(struct radeon_device *rdev)
0224 {
0225     int i;
0226     rdev->pm.dynpm_can_upclock = true;
0227     rdev->pm.dynpm_can_downclock = true;
0228 
0229     switch (rdev->pm.dynpm_planned_action) {
0230     case DYNPM_ACTION_MINIMUM:
0231         rdev->pm.requested_power_state_index = 0;
0232         rdev->pm.dynpm_can_downclock = false;
0233         break;
0234     case DYNPM_ACTION_DOWNCLOCK:
0235         if (rdev->pm.current_power_state_index == 0) {
0236             rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
0237             rdev->pm.dynpm_can_downclock = false;
0238         } else {
0239             if (rdev->pm.active_crtc_count > 1) {
0240                 for (i = 0; i < rdev->pm.num_power_states; i++) {
0241                     if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
0242                         continue;
0243                     else if (i >= rdev->pm.current_power_state_index) {
0244                         rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
0245                         break;
0246                     } else {
0247                         rdev->pm.requested_power_state_index = i;
0248                         break;
0249                     }
0250                 }
0251             } else
0252                 rdev->pm.requested_power_state_index =
0253                     rdev->pm.current_power_state_index - 1;
0254         }
0255         /* don't use the power state if crtcs are active and no display flag is set */
0256         if ((rdev->pm.active_crtc_count > 0) &&
0257             (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags &
0258              RADEON_PM_MODE_NO_DISPLAY)) {
0259             rdev->pm.requested_power_state_index++;
0260         }
0261         break;
0262     case DYNPM_ACTION_UPCLOCK:
0263         if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) {
0264             rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
0265             rdev->pm.dynpm_can_upclock = false;
0266         } else {
0267             if (rdev->pm.active_crtc_count > 1) {
0268                 for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) {
0269                     if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
0270                         continue;
0271                     else if (i <= rdev->pm.current_power_state_index) {
0272                         rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
0273                         break;
0274                     } else {
0275                         rdev->pm.requested_power_state_index = i;
0276                         break;
0277                     }
0278                 }
0279             } else
0280                 rdev->pm.requested_power_state_index =
0281                     rdev->pm.current_power_state_index + 1;
0282         }
0283         break;
0284     case DYNPM_ACTION_DEFAULT:
0285         rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index;
0286         rdev->pm.dynpm_can_upclock = false;
0287         break;
0288     case DYNPM_ACTION_NONE:
0289     default:
0290         DRM_ERROR("Requested mode for not defined action\n");
0291         return;
0292     }
0293     /* only one clock mode per power state */
0294     rdev->pm.requested_clock_mode_index = 0;
0295 
0296     DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n",
0297           rdev->pm.power_state[rdev->pm.requested_power_state_index].
0298           clock_info[rdev->pm.requested_clock_mode_index].sclk,
0299           rdev->pm.power_state[rdev->pm.requested_power_state_index].
0300           clock_info[rdev->pm.requested_clock_mode_index].mclk,
0301           rdev->pm.power_state[rdev->pm.requested_power_state_index].
0302           pcie_lanes);
0303 }
0304 
0305 /**
0306  * r100_pm_init_profile - Initialize power profiles callback.
0307  *
0308  * @rdev: radeon_device pointer
0309  *
0310  * Initialize the power states used in profile mode
0311  * (r1xx-r3xx).
0312  * Used for profile mode only.
0313  */
0314 void r100_pm_init_profile(struct radeon_device *rdev)
0315 {
0316     /* default */
0317     rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index;
0318     rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
0319     rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0;
0320     rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0;
0321     /* low sh */
0322     rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0;
0323     rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0;
0324     rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0;
0325     rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0;
0326     /* mid sh */
0327     rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0;
0328     rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0;
0329     rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0;
0330     rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0;
0331     /* high sh */
0332     rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0;
0333     rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
0334     rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0;
0335     rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0;
0336     /* low mh */
0337     rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0;
0338     rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
0339     rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0;
0340     rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0;
0341     /* mid mh */
0342     rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0;
0343     rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
0344     rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0;
0345     rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0;
0346     /* high mh */
0347     rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0;
0348     rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
0349     rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0;
0350     rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
0351 }
0352 
0353 /**
0354  * r100_pm_misc - set additional pm hw parameters callback.
0355  *
0356  * @rdev: radeon_device pointer
0357  *
0358  * Set non-clock parameters associated with a power state
0359  * (voltage, pcie lanes, etc.) (r1xx-r4xx).
0360  */
0361 void r100_pm_misc(struct radeon_device *rdev)
0362 {
0363     int requested_index = rdev->pm.requested_power_state_index;
0364     struct radeon_power_state *ps = &rdev->pm.power_state[requested_index];
0365     struct radeon_voltage *voltage = &ps->clock_info[0].voltage;
0366     u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl;
0367 
0368     if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) {
0369         if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
0370             tmp = RREG32(voltage->gpio.reg);
0371             if (voltage->active_high)
0372                 tmp |= voltage->gpio.mask;
0373             else
0374                 tmp &= ~(voltage->gpio.mask);
0375             WREG32(voltage->gpio.reg, tmp);
0376             if (voltage->delay)
0377                 udelay(voltage->delay);
0378         } else {
0379             tmp = RREG32(voltage->gpio.reg);
0380             if (voltage->active_high)
0381                 tmp &= ~voltage->gpio.mask;
0382             else
0383                 tmp |= voltage->gpio.mask;
0384             WREG32(voltage->gpio.reg, tmp);
0385             if (voltage->delay)
0386                 udelay(voltage->delay);
0387         }
0388     }
0389 
0390     sclk_cntl = RREG32_PLL(SCLK_CNTL);
0391     sclk_cntl2 = RREG32_PLL(SCLK_CNTL2);
0392     sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3);
0393     sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL);
0394     sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3);
0395     if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) {
0396         sclk_more_cntl |= REDUCED_SPEED_SCLK_EN;
0397         if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE)
0398             sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE;
0399         else
0400             sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE;
0401         if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2)
0402             sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0);
0403         else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4)
0404             sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2);
0405     } else
0406         sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN;
0407 
0408     if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) {
0409         sclk_more_cntl |= IO_CG_VOLTAGE_DROP;
0410         if (voltage->delay) {
0411             sclk_more_cntl |= VOLTAGE_DROP_SYNC;
0412             switch (voltage->delay) {
0413             case 33:
0414                 sclk_more_cntl |= VOLTAGE_DELAY_SEL(0);
0415                 break;
0416             case 66:
0417                 sclk_more_cntl |= VOLTAGE_DELAY_SEL(1);
0418                 break;
0419             case 99:
0420                 sclk_more_cntl |= VOLTAGE_DELAY_SEL(2);
0421                 break;
0422             case 132:
0423                 sclk_more_cntl |= VOLTAGE_DELAY_SEL(3);
0424                 break;
0425             }
0426         } else
0427             sclk_more_cntl &= ~VOLTAGE_DROP_SYNC;
0428     } else
0429         sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP;
0430 
0431     if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN)
0432         sclk_cntl &= ~FORCE_HDP;
0433     else
0434         sclk_cntl |= FORCE_HDP;
0435 
0436     WREG32_PLL(SCLK_CNTL, sclk_cntl);
0437     WREG32_PLL(SCLK_CNTL2, sclk_cntl2);
0438     WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl);
0439 
0440     /* set pcie lanes */
0441     if ((rdev->flags & RADEON_IS_PCIE) &&
0442         !(rdev->flags & RADEON_IS_IGP) &&
0443         rdev->asic->pm.set_pcie_lanes &&
0444         (ps->pcie_lanes !=
0445          rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) {
0446         radeon_set_pcie_lanes(rdev,
0447                       ps->pcie_lanes);
0448         DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes);
0449     }
0450 }
0451 
0452 /**
0453  * r100_pm_prepare - pre-power state change callback.
0454  *
0455  * @rdev: radeon_device pointer
0456  *
0457  * Prepare for a power state change (r1xx-r4xx).
0458  */
0459 void r100_pm_prepare(struct radeon_device *rdev)
0460 {
0461     struct drm_device *ddev = rdev->ddev;
0462     struct drm_crtc *crtc;
0463     struct radeon_crtc *radeon_crtc;
0464     u32 tmp;
0465 
0466     /* disable any active CRTCs */
0467     list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
0468         radeon_crtc = to_radeon_crtc(crtc);
0469         if (radeon_crtc->enabled) {
0470             if (radeon_crtc->crtc_id) {
0471                 tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
0472                 tmp |= RADEON_CRTC2_DISP_REQ_EN_B;
0473                 WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
0474             } else {
0475                 tmp = RREG32(RADEON_CRTC_GEN_CNTL);
0476                 tmp |= RADEON_CRTC_DISP_REQ_EN_B;
0477                 WREG32(RADEON_CRTC_GEN_CNTL, tmp);
0478             }
0479         }
0480     }
0481 }
0482 
0483 /**
0484  * r100_pm_finish - post-power state change callback.
0485  *
0486  * @rdev: radeon_device pointer
0487  *
0488  * Clean up after a power state change (r1xx-r4xx).
0489  */
0490 void r100_pm_finish(struct radeon_device *rdev)
0491 {
0492     struct drm_device *ddev = rdev->ddev;
0493     struct drm_crtc *crtc;
0494     struct radeon_crtc *radeon_crtc;
0495     u32 tmp;
0496 
0497     /* enable any active CRTCs */
0498     list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
0499         radeon_crtc = to_radeon_crtc(crtc);
0500         if (radeon_crtc->enabled) {
0501             if (radeon_crtc->crtc_id) {
0502                 tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
0503                 tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B;
0504                 WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
0505             } else {
0506                 tmp = RREG32(RADEON_CRTC_GEN_CNTL);
0507                 tmp &= ~RADEON_CRTC_DISP_REQ_EN_B;
0508                 WREG32(RADEON_CRTC_GEN_CNTL, tmp);
0509             }
0510         }
0511     }
0512 }
0513 
0514 /**
0515  * r100_gui_idle - gui idle callback.
0516  *
0517  * @rdev: radeon_device pointer
0518  *
0519  * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx).
0520  * Returns true if idle, false if not.
0521  */
0522 bool r100_gui_idle(struct radeon_device *rdev)
0523 {
0524     if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
0525         return false;
0526     else
0527         return true;
0528 }
0529 
0530 /* hpd for digital panel detect/disconnect */
0531 /**
0532  * r100_hpd_sense - hpd sense callback.
0533  *
0534  * @rdev: radeon_device pointer
0535  * @hpd: hpd (hotplug detect) pin
0536  *
0537  * Checks if a digital monitor is connected (r1xx-r4xx).
0538  * Returns true if connected, false if not connected.
0539  */
0540 bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
0541 {
0542     bool connected = false;
0543 
0544     switch (hpd) {
0545     case RADEON_HPD_1:
0546         if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE)
0547             connected = true;
0548         break;
0549     case RADEON_HPD_2:
0550         if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE)
0551             connected = true;
0552         break;
0553     default:
0554         break;
0555     }
0556     return connected;
0557 }
0558 
0559 /**
0560  * r100_hpd_set_polarity - hpd set polarity callback.
0561  *
0562  * @rdev: radeon_device pointer
0563  * @hpd: hpd (hotplug detect) pin
0564  *
0565  * Set the polarity of the hpd pin (r1xx-r4xx).
0566  */
0567 void r100_hpd_set_polarity(struct radeon_device *rdev,
0568                enum radeon_hpd_id hpd)
0569 {
0570     u32 tmp;
0571     bool connected = r100_hpd_sense(rdev, hpd);
0572 
0573     switch (hpd) {
0574     case RADEON_HPD_1:
0575         tmp = RREG32(RADEON_FP_GEN_CNTL);
0576         if (connected)
0577             tmp &= ~RADEON_FP_DETECT_INT_POL;
0578         else
0579             tmp |= RADEON_FP_DETECT_INT_POL;
0580         WREG32(RADEON_FP_GEN_CNTL, tmp);
0581         break;
0582     case RADEON_HPD_2:
0583         tmp = RREG32(RADEON_FP2_GEN_CNTL);
0584         if (connected)
0585             tmp &= ~RADEON_FP2_DETECT_INT_POL;
0586         else
0587             tmp |= RADEON_FP2_DETECT_INT_POL;
0588         WREG32(RADEON_FP2_GEN_CNTL, tmp);
0589         break;
0590     default:
0591         break;
0592     }
0593 }
0594 
0595 /**
0596  * r100_hpd_init - hpd setup callback.
0597  *
0598  * @rdev: radeon_device pointer
0599  *
0600  * Setup the hpd pins used by the card (r1xx-r4xx).
0601  * Set the polarity, and enable the hpd interrupts.
0602  */
0603 void r100_hpd_init(struct radeon_device *rdev)
0604 {
0605     struct drm_device *dev = rdev->ddev;
0606     struct drm_connector *connector;
0607     unsigned enable = 0;
0608 
0609     list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
0610         struct radeon_connector *radeon_connector = to_radeon_connector(connector);
0611         if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
0612             enable |= 1 << radeon_connector->hpd.hpd;
0613         radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
0614     }
0615     radeon_irq_kms_enable_hpd(rdev, enable);
0616 }
0617 
0618 /**
0619  * r100_hpd_fini - hpd tear down callback.
0620  *
0621  * @rdev: radeon_device pointer
0622  *
0623  * Tear down the hpd pins used by the card (r1xx-r4xx).
0624  * Disable the hpd interrupts.
0625  */
0626 void r100_hpd_fini(struct radeon_device *rdev)
0627 {
0628     struct drm_device *dev = rdev->ddev;
0629     struct drm_connector *connector;
0630     unsigned disable = 0;
0631 
0632     list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
0633         struct radeon_connector *radeon_connector = to_radeon_connector(connector);
0634         if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
0635             disable |= 1 << radeon_connector->hpd.hpd;
0636     }
0637     radeon_irq_kms_disable_hpd(rdev, disable);
0638 }
0639 
0640 /*
0641  * PCI GART
0642  */
0643 void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
0644 {
0645     /* TODO: can we do somethings here ? */
0646     /* It seems hw only cache one entry so we should discard this
0647      * entry otherwise if first GPU GART read hit this entry it
0648      * could end up in wrong address. */
0649 }
0650 
0651 int r100_pci_gart_init(struct radeon_device *rdev)
0652 {
0653     int r;
0654 
0655     if (rdev->gart.ptr) {
0656         WARN(1, "R100 PCI GART already initialized\n");
0657         return 0;
0658     }
0659     /* Initialize common gart structure */
0660     r = radeon_gart_init(rdev);
0661     if (r)
0662         return r;
0663     rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
0664     rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
0665     rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry;
0666     rdev->asic->gart.set_page = &r100_pci_gart_set_page;
0667     return radeon_gart_table_ram_alloc(rdev);
0668 }
0669 
0670 int r100_pci_gart_enable(struct radeon_device *rdev)
0671 {
0672     uint32_t tmp;
0673 
0674     /* discard memory request outside of configured range */
0675     tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
0676     WREG32(RADEON_AIC_CNTL, tmp);
0677     /* set address range for PCI address translate */
0678     WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
0679     WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
0680     /* set PCI GART page-table base address */
0681     WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
0682     tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
0683     WREG32(RADEON_AIC_CNTL, tmp);
0684     r100_pci_gart_tlb_flush(rdev);
0685     DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n",
0686          (unsigned)(rdev->mc.gtt_size >> 20),
0687          (unsigned long long)rdev->gart.table_addr);
0688     rdev->gart.ready = true;
0689     return 0;
0690 }
0691 
0692 void r100_pci_gart_disable(struct radeon_device *rdev)
0693 {
0694     uint32_t tmp;
0695 
0696     /* discard memory request outside of configured range */
0697     tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
0698     WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
0699     WREG32(RADEON_AIC_LO_ADDR, 0);
0700     WREG32(RADEON_AIC_HI_ADDR, 0);
0701 }
0702 
0703 uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags)
0704 {
0705     return addr;
0706 }
0707 
0708 void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
0709                 uint64_t entry)
0710 {
0711     u32 *gtt = rdev->gart.ptr;
0712     gtt[i] = cpu_to_le32(lower_32_bits(entry));
0713 }
0714 
0715 void r100_pci_gart_fini(struct radeon_device *rdev)
0716 {
0717     radeon_gart_fini(rdev);
0718     r100_pci_gart_disable(rdev);
0719     radeon_gart_table_ram_free(rdev);
0720 }
0721 
0722 int r100_irq_set(struct radeon_device *rdev)
0723 {
0724     uint32_t tmp = 0;
0725 
0726     if (!rdev->irq.installed) {
0727         WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
0728         WREG32(R_000040_GEN_INT_CNTL, 0);
0729         return -EINVAL;
0730     }
0731     if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
0732         tmp |= RADEON_SW_INT_ENABLE;
0733     }
0734     if (rdev->irq.crtc_vblank_int[0] ||
0735         atomic_read(&rdev->irq.pflip[0])) {
0736         tmp |= RADEON_CRTC_VBLANK_MASK;
0737     }
0738     if (rdev->irq.crtc_vblank_int[1] ||
0739         atomic_read(&rdev->irq.pflip[1])) {
0740         tmp |= RADEON_CRTC2_VBLANK_MASK;
0741     }
0742     if (rdev->irq.hpd[0]) {
0743         tmp |= RADEON_FP_DETECT_MASK;
0744     }
0745     if (rdev->irq.hpd[1]) {
0746         tmp |= RADEON_FP2_DETECT_MASK;
0747     }
0748     WREG32(RADEON_GEN_INT_CNTL, tmp);
0749 
0750     /* read back to post the write */
0751     RREG32(RADEON_GEN_INT_CNTL);
0752 
0753     return 0;
0754 }
0755 
0756 void r100_irq_disable(struct radeon_device *rdev)
0757 {
0758     u32 tmp;
0759 
0760     WREG32(R_000040_GEN_INT_CNTL, 0);
0761     /* Wait and acknowledge irq */
0762     mdelay(1);
0763     tmp = RREG32(R_000044_GEN_INT_STATUS);
0764     WREG32(R_000044_GEN_INT_STATUS, tmp);
0765 }
0766 
0767 static uint32_t r100_irq_ack(struct radeon_device *rdev)
0768 {
0769     uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
0770     uint32_t irq_mask = RADEON_SW_INT_TEST |
0771         RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT |
0772         RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT;
0773 
0774     if (irqs) {
0775         WREG32(RADEON_GEN_INT_STATUS, irqs);
0776     }
0777     return irqs & irq_mask;
0778 }
0779 
0780 int r100_irq_process(struct radeon_device *rdev)
0781 {
0782     uint32_t status, msi_rearm;
0783     bool queue_hotplug = false;
0784 
0785     status = r100_irq_ack(rdev);
0786     if (!status) {
0787         return IRQ_NONE;
0788     }
0789     if (rdev->shutdown) {
0790         return IRQ_NONE;
0791     }
0792     while (status) {
0793         /* SW interrupt */
0794         if (status & RADEON_SW_INT_TEST) {
0795             radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
0796         }
0797         /* Vertical blank interrupts */
0798         if (status & RADEON_CRTC_VBLANK_STAT) {
0799             if (rdev->irq.crtc_vblank_int[0]) {
0800                 drm_handle_vblank(rdev->ddev, 0);
0801                 rdev->pm.vblank_sync = true;
0802                 wake_up(&rdev->irq.vblank_queue);
0803             }
0804             if (atomic_read(&rdev->irq.pflip[0]))
0805                 radeon_crtc_handle_vblank(rdev, 0);
0806         }
0807         if (status & RADEON_CRTC2_VBLANK_STAT) {
0808             if (rdev->irq.crtc_vblank_int[1]) {
0809                 drm_handle_vblank(rdev->ddev, 1);
0810                 rdev->pm.vblank_sync = true;
0811                 wake_up(&rdev->irq.vblank_queue);
0812             }
0813             if (atomic_read(&rdev->irq.pflip[1]))
0814                 radeon_crtc_handle_vblank(rdev, 1);
0815         }
0816         if (status & RADEON_FP_DETECT_STAT) {
0817             queue_hotplug = true;
0818             DRM_DEBUG("HPD1\n");
0819         }
0820         if (status & RADEON_FP2_DETECT_STAT) {
0821             queue_hotplug = true;
0822             DRM_DEBUG("HPD2\n");
0823         }
0824         status = r100_irq_ack(rdev);
0825     }
0826     if (queue_hotplug)
0827         schedule_delayed_work(&rdev->hotplug_work, 0);
0828     if (rdev->msi_enabled) {
0829         switch (rdev->family) {
0830         case CHIP_RS400:
0831         case CHIP_RS480:
0832             msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM;
0833             WREG32(RADEON_AIC_CNTL, msi_rearm);
0834             WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM);
0835             break;
0836         default:
0837             WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN);
0838             break;
0839         }
0840     }
0841     return IRQ_HANDLED;
0842 }
0843 
0844 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
0845 {
0846     if (crtc == 0)
0847         return RREG32(RADEON_CRTC_CRNT_FRAME);
0848     else
0849         return RREG32(RADEON_CRTC2_CRNT_FRAME);
0850 }
0851 
0852 /**
0853  * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
0854  * @rdev: radeon device structure
0855  * @ring: ring buffer struct for emitting packets
0856  */
0857 static void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
0858 {
0859     radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
0860     radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
0861                 RADEON_HDP_READ_BUFFER_INVALIDATE);
0862     radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
0863     radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
0864 }
0865 
0866 /* Who ever call radeon_fence_emit should call ring_lock and ask
0867  * for enough space (today caller are ib schedule and buffer move) */
0868 void r100_fence_ring_emit(struct radeon_device *rdev,
0869               struct radeon_fence *fence)
0870 {
0871     struct radeon_ring *ring = &rdev->ring[fence->ring];
0872 
0873     /* We have to make sure that caches are flushed before
0874      * CPU might read something from VRAM. */
0875     radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
0876     radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL);
0877     radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
0878     radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL);
0879     /* Wait until IDLE & CLEAN */
0880     radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
0881     radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
0882     r100_ring_hdp_flush(rdev, ring);
0883     /* Emit fence sequence & fire IRQ */
0884     radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
0885     radeon_ring_write(ring, fence->seq);
0886     radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
0887     radeon_ring_write(ring, RADEON_SW_INT_FIRE);
0888 }
0889 
0890 bool r100_semaphore_ring_emit(struct radeon_device *rdev,
0891                   struct radeon_ring *ring,
0892                   struct radeon_semaphore *semaphore,
0893                   bool emit_wait)
0894 {
0895     /* Unused on older asics, since we don't have semaphores or multiple rings */
0896     BUG();
0897     return false;
0898 }
0899 
0900 struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
0901                     uint64_t src_offset,
0902                     uint64_t dst_offset,
0903                     unsigned num_gpu_pages,
0904                     struct dma_resv *resv)
0905 {
0906     struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
0907     struct radeon_fence *fence;
0908     uint32_t cur_pages;
0909     uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
0910     uint32_t pitch;
0911     uint32_t stride_pixels;
0912     unsigned ndw;
0913     int num_loops;
0914     int r = 0;
0915 
0916     /* radeon limited to 16k stride */
0917     stride_bytes &= 0x3fff;
0918     /* radeon pitch is /64 */
0919     pitch = stride_bytes / 64;
0920     stride_pixels = stride_bytes / 4;
0921     num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
0922 
0923     /* Ask for enough room for blit + flush + fence */
0924     ndw = 64 + (10 * num_loops);
0925     r = radeon_ring_lock(rdev, ring, ndw);
0926     if (r) {
0927         DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
0928         return ERR_PTR(-EINVAL);
0929     }
0930     while (num_gpu_pages > 0) {
0931         cur_pages = num_gpu_pages;
0932         if (cur_pages > 8191) {
0933             cur_pages = 8191;
0934         }
0935         num_gpu_pages -= cur_pages;
0936 
0937         /* pages are in Y direction - height
0938            page width in X direction - width */
0939         radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8));
0940         radeon_ring_write(ring,
0941                   RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
0942                   RADEON_GMC_DST_PITCH_OFFSET_CNTL |
0943                   RADEON_GMC_SRC_CLIPPING |
0944                   RADEON_GMC_DST_CLIPPING |
0945                   RADEON_GMC_BRUSH_NONE |
0946                   (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
0947                   RADEON_GMC_SRC_DATATYPE_COLOR |
0948                   RADEON_ROP3_S |
0949                   RADEON_DP_SRC_SOURCE_MEMORY |
0950                   RADEON_GMC_CLR_CMP_CNTL_DIS |
0951                   RADEON_GMC_WR_MSK_DIS);
0952         radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
0953         radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
0954         radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
0955         radeon_ring_write(ring, 0);
0956         radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
0957         radeon_ring_write(ring, num_gpu_pages);
0958         radeon_ring_write(ring, num_gpu_pages);
0959         radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
0960     }
0961     radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
0962     radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL);
0963     radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
0964     radeon_ring_write(ring,
0965               RADEON_WAIT_2D_IDLECLEAN |
0966               RADEON_WAIT_HOST_IDLECLEAN |
0967               RADEON_WAIT_DMA_GUI_IDLE);
0968     r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
0969     if (r) {
0970         radeon_ring_unlock_undo(rdev, ring);
0971         return ERR_PTR(r);
0972     }
0973     radeon_ring_unlock_commit(rdev, ring, false);
0974     return fence;
0975 }
0976 
0977 static int r100_cp_wait_for_idle(struct radeon_device *rdev)
0978 {
0979     unsigned i;
0980     u32 tmp;
0981 
0982     for (i = 0; i < rdev->usec_timeout; i++) {
0983         tmp = RREG32(R_000E40_RBBM_STATUS);
0984         if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
0985             return 0;
0986         }
0987         udelay(1);
0988     }
0989     return -1;
0990 }
0991 
0992 void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
0993 {
0994     int r;
0995 
0996     r = radeon_ring_lock(rdev, ring, 2);
0997     if (r) {
0998         return;
0999     }
1000     radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0));
1001     radeon_ring_write(ring,
1002               RADEON_ISYNC_ANY2D_IDLE3D |
1003               RADEON_ISYNC_ANY3D_IDLE2D |
1004               RADEON_ISYNC_WAIT_IDLEGUI |
1005               RADEON_ISYNC_CPSCRATCH_IDLEGUI);
1006     radeon_ring_unlock_commit(rdev, ring, false);
1007 }
1008 
1009 
1010 /* Load the microcode for the CP */
1011 static int r100_cp_init_microcode(struct radeon_device *rdev)
1012 {
1013     const char *fw_name = NULL;
1014     int err;
1015 
1016     DRM_DEBUG_KMS("\n");
1017 
1018     if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
1019         (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
1020         (rdev->family == CHIP_RS200)) {
1021         DRM_INFO("Loading R100 Microcode\n");
1022         fw_name = FIRMWARE_R100;
1023     } else if ((rdev->family == CHIP_R200) ||
1024            (rdev->family == CHIP_RV250) ||
1025            (rdev->family == CHIP_RV280) ||
1026            (rdev->family == CHIP_RS300)) {
1027         DRM_INFO("Loading R200 Microcode\n");
1028         fw_name = FIRMWARE_R200;
1029     } else if ((rdev->family == CHIP_R300) ||
1030            (rdev->family == CHIP_R350) ||
1031            (rdev->family == CHIP_RV350) ||
1032            (rdev->family == CHIP_RV380) ||
1033            (rdev->family == CHIP_RS400) ||
1034            (rdev->family == CHIP_RS480)) {
1035         DRM_INFO("Loading R300 Microcode\n");
1036         fw_name = FIRMWARE_R300;
1037     } else if ((rdev->family == CHIP_R420) ||
1038            (rdev->family == CHIP_R423) ||
1039            (rdev->family == CHIP_RV410)) {
1040         DRM_INFO("Loading R400 Microcode\n");
1041         fw_name = FIRMWARE_R420;
1042     } else if ((rdev->family == CHIP_RS690) ||
1043            (rdev->family == CHIP_RS740)) {
1044         DRM_INFO("Loading RS690/RS740 Microcode\n");
1045         fw_name = FIRMWARE_RS690;
1046     } else if (rdev->family == CHIP_RS600) {
1047         DRM_INFO("Loading RS600 Microcode\n");
1048         fw_name = FIRMWARE_RS600;
1049     } else if ((rdev->family == CHIP_RV515) ||
1050            (rdev->family == CHIP_R520) ||
1051            (rdev->family == CHIP_RV530) ||
1052            (rdev->family == CHIP_R580) ||
1053            (rdev->family == CHIP_RV560) ||
1054            (rdev->family == CHIP_RV570)) {
1055         DRM_INFO("Loading R500 Microcode\n");
1056         fw_name = FIRMWARE_R520;
1057     }
1058 
1059     err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1060     if (err) {
1061         pr_err("radeon_cp: Failed to load firmware \"%s\"\n", fw_name);
1062     } else if (rdev->me_fw->size % 8) {
1063         pr_err("radeon_cp: Bogus length %zu in firmware \"%s\"\n",
1064                rdev->me_fw->size, fw_name);
1065         err = -EINVAL;
1066         release_firmware(rdev->me_fw);
1067         rdev->me_fw = NULL;
1068     }
1069     return err;
1070 }
1071 
1072 u32 r100_gfx_get_rptr(struct radeon_device *rdev,
1073               struct radeon_ring *ring)
1074 {
1075     u32 rptr;
1076 
1077     if (rdev->wb.enabled)
1078         rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
1079     else
1080         rptr = RREG32(RADEON_CP_RB_RPTR);
1081 
1082     return rptr;
1083 }
1084 
1085 u32 r100_gfx_get_wptr(struct radeon_device *rdev,
1086               struct radeon_ring *ring)
1087 {
1088     return RREG32(RADEON_CP_RB_WPTR);
1089 }
1090 
1091 void r100_gfx_set_wptr(struct radeon_device *rdev,
1092                struct radeon_ring *ring)
1093 {
1094     WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1095     (void)RREG32(RADEON_CP_RB_WPTR);
1096 }
1097 
1098 static void r100_cp_load_microcode(struct radeon_device *rdev)
1099 {
1100     const __be32 *fw_data;
1101     int i, size;
1102 
1103     if (r100_gui_wait_for_idle(rdev)) {
1104         pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
1105     }
1106 
1107     if (rdev->me_fw) {
1108         size = rdev->me_fw->size / 4;
1109         fw_data = (const __be32 *)&rdev->me_fw->data[0];
1110         WREG32(RADEON_CP_ME_RAM_ADDR, 0);
1111         for (i = 0; i < size; i += 2) {
1112             WREG32(RADEON_CP_ME_RAM_DATAH,
1113                    be32_to_cpup(&fw_data[i]));
1114             WREG32(RADEON_CP_ME_RAM_DATAL,
1115                    be32_to_cpup(&fw_data[i + 1]));
1116         }
1117     }
1118 }
1119 
1120 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1121 {
1122     struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1123     unsigned rb_bufsz;
1124     unsigned rb_blksz;
1125     unsigned max_fetch;
1126     unsigned pre_write_timer;
1127     unsigned pre_write_limit;
1128     unsigned indirect2_start;
1129     unsigned indirect1_start;
1130     uint32_t tmp;
1131     int r;
1132 
1133     r100_debugfs_cp_init(rdev);
1134     if (!rdev->me_fw) {
1135         r = r100_cp_init_microcode(rdev);
1136         if (r) {
1137             DRM_ERROR("Failed to load firmware!\n");
1138             return r;
1139         }
1140     }
1141 
1142     /* Align ring size */
1143     rb_bufsz = order_base_2(ring_size / 8);
1144     ring_size = (1 << (rb_bufsz + 1)) * 4;
1145     r100_cp_load_microcode(rdev);
1146     r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
1147                  RADEON_CP_PACKET2);
1148     if (r) {
1149         return r;
1150     }
1151     /* Each time the cp read 1024 bytes (16 dword/quadword) update
1152      * the rptr copy in system ram */
1153     rb_blksz = 9;
1154     /* cp will read 128bytes at a time (4 dwords) */
1155     max_fetch = 1;
1156     ring->align_mask = 16 - 1;
1157     /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
1158     pre_write_timer = 64;
1159     /* Force CP_RB_WPTR write if written more than one time before the
1160      * delay expire
1161      */
1162     pre_write_limit = 0;
1163     /* Setup the cp cache like this (cache size is 96 dwords) :
1164      *  RING        0  to 15
1165      *  INDIRECT1   16 to 79
1166      *  INDIRECT2   80 to 95
1167      * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1168      *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
1169      *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1170      * Idea being that most of the gpu cmd will be through indirect1 buffer
1171      * so it gets the bigger cache.
1172      */
1173     indirect2_start = 80;
1174     indirect1_start = 16;
1175     /* cp setup */
1176     WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
1177     tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
1178            REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
1179            REG_SET(RADEON_MAX_FETCH, max_fetch));
1180 #ifdef __BIG_ENDIAN
1181     tmp |= RADEON_BUF_SWAP_32BIT;
1182 #endif
1183     WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
1184 
1185     /* Set ring address */
1186     DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr);
1187     WREG32(RADEON_CP_RB_BASE, ring->gpu_addr);
1188     /* Force read & write ptr to 0 */
1189     WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
1190     WREG32(RADEON_CP_RB_RPTR_WR, 0);
1191     ring->wptr = 0;
1192     WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1193 
1194     /* set the wb address whether it's enabled or not */
1195     WREG32(R_00070C_CP_RB_RPTR_ADDR,
1196         S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
1197     WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
1198 
1199     if (rdev->wb.enabled)
1200         WREG32(R_000770_SCRATCH_UMSK, 0xff);
1201     else {
1202         tmp |= RADEON_RB_NO_UPDATE;
1203         WREG32(R_000770_SCRATCH_UMSK, 0);
1204     }
1205 
1206     WREG32(RADEON_CP_RB_CNTL, tmp);
1207     udelay(10);
1208     /* Set cp mode to bus mastering & enable cp*/
1209     WREG32(RADEON_CP_CSQ_MODE,
1210            REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
1211            REG_SET(RADEON_INDIRECT1_START, indirect1_start));
1212     WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
1213     WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
1214     WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
1215 
1216     /* at this point everything should be setup correctly to enable master */
1217     pci_set_master(rdev->pdev);
1218 
1219     radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1220     r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring);
1221     if (r) {
1222         DRM_ERROR("radeon: cp isn't working (%d).\n", r);
1223         return r;
1224     }
1225     ring->ready = true;
1226     radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1227 
1228     if (!ring->rptr_save_reg /* not resuming from suspend */
1229         && radeon_ring_supports_scratch_reg(rdev, ring)) {
1230         r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
1231         if (r) {
1232             DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
1233             ring->rptr_save_reg = 0;
1234         }
1235     }
1236     return 0;
1237 }
1238 
1239 void r100_cp_fini(struct radeon_device *rdev)
1240 {
1241     if (r100_cp_wait_for_idle(rdev)) {
1242         DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
1243     }
1244     /* Disable ring */
1245     r100_cp_disable(rdev);
1246     radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
1247     radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1248     DRM_INFO("radeon: cp finalized\n");
1249 }
1250 
1251 void r100_cp_disable(struct radeon_device *rdev)
1252 {
1253     /* Disable ring */
1254     radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1255     rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1256     WREG32(RADEON_CP_CSQ_MODE, 0);
1257     WREG32(RADEON_CP_CSQ_CNTL, 0);
1258     WREG32(R_000770_SCRATCH_UMSK, 0);
1259     if (r100_gui_wait_for_idle(rdev)) {
1260         pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
1261     }
1262 }
1263 
1264 /*
1265  * CS functions
1266  */
1267 int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
1268                 struct radeon_cs_packet *pkt,
1269                 unsigned idx,
1270                 unsigned reg)
1271 {
1272     int r;
1273     u32 tile_flags = 0;
1274     u32 tmp;
1275     struct radeon_bo_list *reloc;
1276     u32 value;
1277 
1278     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1279     if (r) {
1280         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1281               idx, reg);
1282         radeon_cs_dump_packet(p, pkt);
1283         return r;
1284     }
1285 
1286     value = radeon_get_ib_value(p, idx);
1287     tmp = value & 0x003fffff;
1288     tmp += (((u32)reloc->gpu_offset) >> 10);
1289 
1290     if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1291         if (reloc->tiling_flags & RADEON_TILING_MACRO)
1292             tile_flags |= RADEON_DST_TILE_MACRO;
1293         if (reloc->tiling_flags & RADEON_TILING_MICRO) {
1294             if (reg == RADEON_SRC_PITCH_OFFSET) {
1295                 DRM_ERROR("Cannot src blit from microtiled surface\n");
1296                 radeon_cs_dump_packet(p, pkt);
1297                 return -EINVAL;
1298             }
1299             tile_flags |= RADEON_DST_TILE_MICRO;
1300         }
1301 
1302         tmp |= tile_flags;
1303         p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
1304     } else
1305         p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
1306     return 0;
1307 }
1308 
1309 int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
1310                  struct radeon_cs_packet *pkt,
1311                  int idx)
1312 {
1313     unsigned c, i;
1314     struct radeon_bo_list *reloc;
1315     struct r100_cs_track *track;
1316     int r = 0;
1317     volatile uint32_t *ib;
1318     u32 idx_value;
1319 
1320     ib = p->ib.ptr;
1321     track = (struct r100_cs_track *)p->track;
1322     c = radeon_get_ib_value(p, idx++) & 0x1F;
1323     if (c > 16) {
1324         DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
1325               pkt->opcode);
1326         radeon_cs_dump_packet(p, pkt);
1327         return -EINVAL;
1328     }
1329     track->num_arrays = c;
1330     for (i = 0; i < (c - 1); i+=2, idx+=3) {
1331         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1332         if (r) {
1333             DRM_ERROR("No reloc for packet3 %d\n",
1334                   pkt->opcode);
1335             radeon_cs_dump_packet(p, pkt);
1336             return r;
1337         }
1338         idx_value = radeon_get_ib_value(p, idx);
1339         ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1340 
1341         track->arrays[i + 0].esize = idx_value >> 8;
1342         track->arrays[i + 0].robj = reloc->robj;
1343         track->arrays[i + 0].esize &= 0x7F;
1344         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1345         if (r) {
1346             DRM_ERROR("No reloc for packet3 %d\n",
1347                   pkt->opcode);
1348             radeon_cs_dump_packet(p, pkt);
1349             return r;
1350         }
1351         ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset);
1352         track->arrays[i + 1].robj = reloc->robj;
1353         track->arrays[i + 1].esize = idx_value >> 24;
1354         track->arrays[i + 1].esize &= 0x7F;
1355     }
1356     if (c & 1) {
1357         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1358         if (r) {
1359             DRM_ERROR("No reloc for packet3 %d\n",
1360                       pkt->opcode);
1361             radeon_cs_dump_packet(p, pkt);
1362             return r;
1363         }
1364         idx_value = radeon_get_ib_value(p, idx);
1365         ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
1366         track->arrays[i + 0].robj = reloc->robj;
1367         track->arrays[i + 0].esize = idx_value >> 8;
1368         track->arrays[i + 0].esize &= 0x7F;
1369     }
1370     return r;
1371 }
1372 
1373 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
1374               struct radeon_cs_packet *pkt,
1375               const unsigned *auth, unsigned n,
1376               radeon_packet0_check_t check)
1377 {
1378     unsigned reg;
1379     unsigned i, j, m;
1380     unsigned idx;
1381     int r;
1382 
1383     idx = pkt->idx + 1;
1384     reg = pkt->reg;
1385     /* Check that register fall into register range
1386      * determined by the number of entry (n) in the
1387      * safe register bitmap.
1388      */
1389     if (pkt->one_reg_wr) {
1390         if ((reg >> 7) > n) {
1391             return -EINVAL;
1392         }
1393     } else {
1394         if (((reg + (pkt->count << 2)) >> 7) > n) {
1395             return -EINVAL;
1396         }
1397     }
1398     for (i = 0; i <= pkt->count; i++, idx++) {
1399         j = (reg >> 7);
1400         m = 1 << ((reg >> 2) & 31);
1401         if (auth[j] & m) {
1402             r = check(p, pkt, idx, reg);
1403             if (r) {
1404                 return r;
1405             }
1406         }
1407         if (pkt->one_reg_wr) {
1408             if (!(auth[j] & m)) {
1409                 break;
1410             }
1411         } else {
1412             reg += 4;
1413         }
1414     }
1415     return 0;
1416 }
1417 
1418 /**
1419  * r100_cs_packet_parse_vline() - parse userspace VLINE packet
1420  * @p:      parser structure holding parsing context.
1421  *
1422  * Userspace sends a special sequence for VLINE waits.
1423  * PACKET0 - VLINE_START_END + value
1424  * PACKET0 - WAIT_UNTIL +_value
1425  * RELOC (P3) - crtc_id in reloc.
1426  *
1427  * This function parses this and relocates the VLINE START END
1428  * and WAIT UNTIL packets to the correct crtc.
1429  * It also detects a switched off crtc and nulls out the
1430  * wait in that case.
1431  */
1432 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
1433 {
1434     struct drm_crtc *crtc;
1435     struct radeon_crtc *radeon_crtc;
1436     struct radeon_cs_packet p3reloc, waitreloc;
1437     int crtc_id;
1438     int r;
1439     uint32_t header, h_idx, reg;
1440     volatile uint32_t *ib;
1441 
1442     ib = p->ib.ptr;
1443 
1444     /* parse the wait until */
1445     r = radeon_cs_packet_parse(p, &waitreloc, p->idx);
1446     if (r)
1447         return r;
1448 
1449     /* check its a wait until and only 1 count */
1450     if (waitreloc.reg != RADEON_WAIT_UNTIL ||
1451         waitreloc.count != 0) {
1452         DRM_ERROR("vline wait had illegal wait until segment\n");
1453         return -EINVAL;
1454     }
1455 
1456     if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
1457         DRM_ERROR("vline wait had illegal wait until\n");
1458         return -EINVAL;
1459     }
1460 
1461     /* jump over the NOP */
1462     r = radeon_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2);
1463     if (r)
1464         return r;
1465 
1466     h_idx = p->idx - 2;
1467     p->idx += waitreloc.count + 2;
1468     p->idx += p3reloc.count + 2;
1469 
1470     header = radeon_get_ib_value(p, h_idx);
1471     crtc_id = radeon_get_ib_value(p, h_idx + 5);
1472     reg = R100_CP_PACKET0_GET_REG(header);
1473     crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id);
1474     if (!crtc) {
1475         DRM_ERROR("cannot find crtc %d\n", crtc_id);
1476         return -ENOENT;
1477     }
1478     radeon_crtc = to_radeon_crtc(crtc);
1479     crtc_id = radeon_crtc->crtc_id;
1480 
1481     if (!crtc->enabled) {
1482         /* if the CRTC isn't enabled - we need to nop out the wait until */
1483         ib[h_idx + 2] = PACKET2(0);
1484         ib[h_idx + 3] = PACKET2(0);
1485     } else if (crtc_id == 1) {
1486         switch (reg) {
1487         case AVIVO_D1MODE_VLINE_START_END:
1488             header &= ~R300_CP_PACKET0_REG_MASK;
1489             header |= AVIVO_D2MODE_VLINE_START_END >> 2;
1490             break;
1491         case RADEON_CRTC_GUI_TRIG_VLINE:
1492             header &= ~R300_CP_PACKET0_REG_MASK;
1493             header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
1494             break;
1495         default:
1496             DRM_ERROR("unknown crtc reloc\n");
1497             return -EINVAL;
1498         }
1499         ib[h_idx] = header;
1500         ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
1501     }
1502 
1503     return 0;
1504 }
1505 
1506 static int r100_get_vtx_size(uint32_t vtx_fmt)
1507 {
1508     int vtx_size;
1509     vtx_size = 2;
1510     /* ordered according to bits in spec */
1511     if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
1512         vtx_size++;
1513     if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
1514         vtx_size += 3;
1515     if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
1516         vtx_size++;
1517     if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
1518         vtx_size++;
1519     if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
1520         vtx_size += 3;
1521     if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
1522         vtx_size++;
1523     if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
1524         vtx_size++;
1525     if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
1526         vtx_size += 2;
1527     if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
1528         vtx_size += 2;
1529     if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
1530         vtx_size++;
1531     if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
1532         vtx_size += 2;
1533     if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
1534         vtx_size++;
1535     if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
1536         vtx_size += 2;
1537     if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
1538         vtx_size++;
1539     if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
1540         vtx_size++;
1541     /* blend weight */
1542     if (vtx_fmt & (0x7 << 15))
1543         vtx_size += (vtx_fmt >> 15) & 0x7;
1544     if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
1545         vtx_size += 3;
1546     if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
1547         vtx_size += 2;
1548     if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
1549         vtx_size++;
1550     if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
1551         vtx_size++;
1552     if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
1553         vtx_size++;
1554     if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
1555         vtx_size++;
1556     return vtx_size;
1557 }
1558 
1559 static int r100_packet0_check(struct radeon_cs_parser *p,
1560                   struct radeon_cs_packet *pkt,
1561                   unsigned idx, unsigned reg)
1562 {
1563     struct radeon_bo_list *reloc;
1564     struct r100_cs_track *track;
1565     volatile uint32_t *ib;
1566     uint32_t tmp;
1567     int r;
1568     int i, face;
1569     u32 tile_flags = 0;
1570     u32 idx_value;
1571 
1572     ib = p->ib.ptr;
1573     track = (struct r100_cs_track *)p->track;
1574 
1575     idx_value = radeon_get_ib_value(p, idx);
1576 
1577     switch (reg) {
1578     case RADEON_CRTC_GUI_TRIG_VLINE:
1579         r = r100_cs_packet_parse_vline(p);
1580         if (r) {
1581             DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1582                   idx, reg);
1583             radeon_cs_dump_packet(p, pkt);
1584             return r;
1585         }
1586         break;
1587         /* FIXME: only allow PACKET3 blit? easier to check for out of
1588          * range access */
1589     case RADEON_DST_PITCH_OFFSET:
1590     case RADEON_SRC_PITCH_OFFSET:
1591         r = r100_reloc_pitch_offset(p, pkt, idx, reg);
1592         if (r)
1593             return r;
1594         break;
1595     case RADEON_RB3D_DEPTHOFFSET:
1596         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1597         if (r) {
1598             DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1599                   idx, reg);
1600             radeon_cs_dump_packet(p, pkt);
1601             return r;
1602         }
1603         track->zb.robj = reloc->robj;
1604         track->zb.offset = idx_value;
1605         track->zb_dirty = true;
1606         ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1607         break;
1608     case RADEON_RB3D_COLOROFFSET:
1609         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1610         if (r) {
1611             DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1612                   idx, reg);
1613             radeon_cs_dump_packet(p, pkt);
1614             return r;
1615         }
1616         track->cb[0].robj = reloc->robj;
1617         track->cb[0].offset = idx_value;
1618         track->cb_dirty = true;
1619         ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1620         break;
1621     case RADEON_PP_TXOFFSET_0:
1622     case RADEON_PP_TXOFFSET_1:
1623     case RADEON_PP_TXOFFSET_2:
1624         i = (reg - RADEON_PP_TXOFFSET_0) / 24;
1625         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1626         if (r) {
1627             DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1628                   idx, reg);
1629             radeon_cs_dump_packet(p, pkt);
1630             return r;
1631         }
1632         if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1633             if (reloc->tiling_flags & RADEON_TILING_MACRO)
1634                 tile_flags |= RADEON_TXO_MACRO_TILE;
1635             if (reloc->tiling_flags & RADEON_TILING_MICRO)
1636                 tile_flags |= RADEON_TXO_MICRO_TILE_X2;
1637 
1638             tmp = idx_value & ~(0x7 << 2);
1639             tmp |= tile_flags;
1640             ib[idx] = tmp + ((u32)reloc->gpu_offset);
1641         } else
1642             ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1643         track->textures[i].robj = reloc->robj;
1644         track->tex_dirty = true;
1645         break;
1646     case RADEON_PP_CUBIC_OFFSET_T0_0:
1647     case RADEON_PP_CUBIC_OFFSET_T0_1:
1648     case RADEON_PP_CUBIC_OFFSET_T0_2:
1649     case RADEON_PP_CUBIC_OFFSET_T0_3:
1650     case RADEON_PP_CUBIC_OFFSET_T0_4:
1651         i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
1652         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1653         if (r) {
1654             DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1655                   idx, reg);
1656             radeon_cs_dump_packet(p, pkt);
1657             return r;
1658         }
1659         track->textures[0].cube_info[i].offset = idx_value;
1660         ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1661         track->textures[0].cube_info[i].robj = reloc->robj;
1662         track->tex_dirty = true;
1663         break;
1664     case RADEON_PP_CUBIC_OFFSET_T1_0:
1665     case RADEON_PP_CUBIC_OFFSET_T1_1:
1666     case RADEON_PP_CUBIC_OFFSET_T1_2:
1667     case RADEON_PP_CUBIC_OFFSET_T1_3:
1668     case RADEON_PP_CUBIC_OFFSET_T1_4:
1669         i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
1670         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1671         if (r) {
1672             DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1673                   idx, reg);
1674             radeon_cs_dump_packet(p, pkt);
1675             return r;
1676         }
1677         track->textures[1].cube_info[i].offset = idx_value;
1678         ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1679         track->textures[1].cube_info[i].robj = reloc->robj;
1680         track->tex_dirty = true;
1681         break;
1682     case RADEON_PP_CUBIC_OFFSET_T2_0:
1683     case RADEON_PP_CUBIC_OFFSET_T2_1:
1684     case RADEON_PP_CUBIC_OFFSET_T2_2:
1685     case RADEON_PP_CUBIC_OFFSET_T2_3:
1686     case RADEON_PP_CUBIC_OFFSET_T2_4:
1687         i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
1688         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1689         if (r) {
1690             DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1691                   idx, reg);
1692             radeon_cs_dump_packet(p, pkt);
1693             return r;
1694         }
1695         track->textures[2].cube_info[i].offset = idx_value;
1696         ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1697         track->textures[2].cube_info[i].robj = reloc->robj;
1698         track->tex_dirty = true;
1699         break;
1700     case RADEON_RE_WIDTH_HEIGHT:
1701         track->maxy = ((idx_value >> 16) & 0x7FF);
1702         track->cb_dirty = true;
1703         track->zb_dirty = true;
1704         break;
1705     case RADEON_RB3D_COLORPITCH:
1706         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1707         if (r) {
1708             DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1709                   idx, reg);
1710             radeon_cs_dump_packet(p, pkt);
1711             return r;
1712         }
1713         if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1714             if (reloc->tiling_flags & RADEON_TILING_MACRO)
1715                 tile_flags |= RADEON_COLOR_TILE_ENABLE;
1716             if (reloc->tiling_flags & RADEON_TILING_MICRO)
1717                 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1718 
1719             tmp = idx_value & ~(0x7 << 16);
1720             tmp |= tile_flags;
1721             ib[idx] = tmp;
1722         } else
1723             ib[idx] = idx_value;
1724 
1725         track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1726         track->cb_dirty = true;
1727         break;
1728     case RADEON_RB3D_DEPTHPITCH:
1729         track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1730         track->zb_dirty = true;
1731         break;
1732     case RADEON_RB3D_CNTL:
1733         switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
1734         case 7:
1735         case 8:
1736         case 9:
1737         case 11:
1738         case 12:
1739             track->cb[0].cpp = 1;
1740             break;
1741         case 3:
1742         case 4:
1743         case 15:
1744             track->cb[0].cpp = 2;
1745             break;
1746         case 6:
1747             track->cb[0].cpp = 4;
1748             break;
1749         default:
1750             DRM_ERROR("Invalid color buffer format (%d) !\n",
1751                   ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
1752             return -EINVAL;
1753         }
1754         track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1755         track->cb_dirty = true;
1756         track->zb_dirty = true;
1757         break;
1758     case RADEON_RB3D_ZSTENCILCNTL:
1759         switch (idx_value & 0xf) {
1760         case 0:
1761             track->zb.cpp = 2;
1762             break;
1763         case 2:
1764         case 3:
1765         case 4:
1766         case 5:
1767         case 9:
1768         case 11:
1769             track->zb.cpp = 4;
1770             break;
1771         default:
1772             break;
1773         }
1774         track->zb_dirty = true;
1775         break;
1776     case RADEON_RB3D_ZPASS_ADDR:
1777         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1778         if (r) {
1779             DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1780                   idx, reg);
1781             radeon_cs_dump_packet(p, pkt);
1782             return r;
1783         }
1784         ib[idx] = idx_value + ((u32)reloc->gpu_offset);
1785         break;
1786     case RADEON_PP_CNTL:
1787         {
1788             uint32_t temp = idx_value >> 4;
1789             for (i = 0; i < track->num_texture; i++)
1790                 track->textures[i].enabled = !!(temp & (1 << i));
1791             track->tex_dirty = true;
1792         }
1793         break;
1794     case RADEON_SE_VF_CNTL:
1795         track->vap_vf_cntl = idx_value;
1796         break;
1797     case RADEON_SE_VTX_FMT:
1798         track->vtx_size = r100_get_vtx_size(idx_value);
1799         break;
1800     case RADEON_PP_TEX_SIZE_0:
1801     case RADEON_PP_TEX_SIZE_1:
1802     case RADEON_PP_TEX_SIZE_2:
1803         i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1804         track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1805         track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1806         track->tex_dirty = true;
1807         break;
1808     case RADEON_PP_TEX_PITCH_0:
1809     case RADEON_PP_TEX_PITCH_1:
1810     case RADEON_PP_TEX_PITCH_2:
1811         i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1812         track->textures[i].pitch = idx_value + 32;
1813         track->tex_dirty = true;
1814         break;
1815     case RADEON_PP_TXFILTER_0:
1816     case RADEON_PP_TXFILTER_1:
1817     case RADEON_PP_TXFILTER_2:
1818         i = (reg - RADEON_PP_TXFILTER_0) / 24;
1819         track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK)
1820                          >> RADEON_MAX_MIP_LEVEL_SHIFT);
1821         tmp = (idx_value >> 23) & 0x7;
1822         if (tmp == 2 || tmp == 6)
1823             track->textures[i].roundup_w = false;
1824         tmp = (idx_value >> 27) & 0x7;
1825         if (tmp == 2 || tmp == 6)
1826             track->textures[i].roundup_h = false;
1827         track->tex_dirty = true;
1828         break;
1829     case RADEON_PP_TXFORMAT_0:
1830     case RADEON_PP_TXFORMAT_1:
1831     case RADEON_PP_TXFORMAT_2:
1832         i = (reg - RADEON_PP_TXFORMAT_0) / 24;
1833         if (idx_value & RADEON_TXFORMAT_NON_POWER2) {
1834             track->textures[i].use_pitch = true;
1835         } else {
1836             track->textures[i].use_pitch = false;
1837             track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT);
1838             track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT);
1839         }
1840         if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
1841             track->textures[i].tex_coord_type = 2;
1842         switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) {
1843         case RADEON_TXFORMAT_I8:
1844         case RADEON_TXFORMAT_RGB332:
1845         case RADEON_TXFORMAT_Y8:
1846             track->textures[i].cpp = 1;
1847             track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1848             break;
1849         case RADEON_TXFORMAT_AI88:
1850         case RADEON_TXFORMAT_ARGB1555:
1851         case RADEON_TXFORMAT_RGB565:
1852         case RADEON_TXFORMAT_ARGB4444:
1853         case RADEON_TXFORMAT_VYUY422:
1854         case RADEON_TXFORMAT_YVYU422:
1855         case RADEON_TXFORMAT_SHADOW16:
1856         case RADEON_TXFORMAT_LDUDV655:
1857         case RADEON_TXFORMAT_DUDV88:
1858             track->textures[i].cpp = 2;
1859             track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1860             break;
1861         case RADEON_TXFORMAT_ARGB8888:
1862         case RADEON_TXFORMAT_RGBA8888:
1863         case RADEON_TXFORMAT_SHADOW32:
1864         case RADEON_TXFORMAT_LDUDUV8888:
1865             track->textures[i].cpp = 4;
1866             track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1867             break;
1868         case RADEON_TXFORMAT_DXT1:
1869             track->textures[i].cpp = 1;
1870             track->textures[i].compress_format = R100_TRACK_COMP_DXT1;
1871             break;
1872         case RADEON_TXFORMAT_DXT23:
1873         case RADEON_TXFORMAT_DXT45:
1874             track->textures[i].cpp = 1;
1875             track->textures[i].compress_format = R100_TRACK_COMP_DXT35;
1876             break;
1877         }
1878         track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1879         track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1880         track->tex_dirty = true;
1881         break;
1882     case RADEON_PP_CUBIC_FACES_0:
1883     case RADEON_PP_CUBIC_FACES_1:
1884     case RADEON_PP_CUBIC_FACES_2:
1885         tmp = idx_value;
1886         i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
1887         for (face = 0; face < 4; face++) {
1888             track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1889             track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1890         }
1891         track->tex_dirty = true;
1892         break;
1893     default:
1894         pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx);
1895         return -EINVAL;
1896     }
1897     return 0;
1898 }
1899 
1900 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
1901                      struct radeon_cs_packet *pkt,
1902                      struct radeon_bo *robj)
1903 {
1904     unsigned idx;
1905     u32 value;
1906     idx = pkt->idx + 1;
1907     value = radeon_get_ib_value(p, idx + 2);
1908     if ((value + 1) > radeon_bo_size(robj)) {
1909         DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
1910               "(need %u have %lu) !\n",
1911               value + 1,
1912               radeon_bo_size(robj));
1913         return -EINVAL;
1914     }
1915     return 0;
1916 }
1917 
1918 static int r100_packet3_check(struct radeon_cs_parser *p,
1919                   struct radeon_cs_packet *pkt)
1920 {
1921     struct radeon_bo_list *reloc;
1922     struct r100_cs_track *track;
1923     unsigned idx;
1924     volatile uint32_t *ib;
1925     int r;
1926 
1927     ib = p->ib.ptr;
1928     idx = pkt->idx + 1;
1929     track = (struct r100_cs_track *)p->track;
1930     switch (pkt->opcode) {
1931     case PACKET3_3D_LOAD_VBPNTR:
1932         r = r100_packet3_load_vbpntr(p, pkt, idx);
1933         if (r)
1934             return r;
1935         break;
1936     case PACKET3_INDX_BUFFER:
1937         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1938         if (r) {
1939             DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1940             radeon_cs_dump_packet(p, pkt);
1941             return r;
1942         }
1943         ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset);
1944         r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1945         if (r) {
1946             return r;
1947         }
1948         break;
1949     case 0x23:
1950         /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
1951         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1952         if (r) {
1953             DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1954             radeon_cs_dump_packet(p, pkt);
1955             return r;
1956         }
1957         ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset);
1958         track->num_arrays = 1;
1959         track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
1960 
1961         track->arrays[0].robj = reloc->robj;
1962         track->arrays[0].esize = track->vtx_size;
1963 
1964         track->max_indx = radeon_get_ib_value(p, idx+1);
1965 
1966         track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
1967         track->immd_dwords = pkt->count - 1;
1968         r = r100_cs_track_check(p->rdev, track);
1969         if (r)
1970             return r;
1971         break;
1972     case PACKET3_3D_DRAW_IMMD:
1973         if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
1974             DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1975             return -EINVAL;
1976         }
1977         track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
1978         track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1979         track->immd_dwords = pkt->count - 1;
1980         r = r100_cs_track_check(p->rdev, track);
1981         if (r)
1982             return r;
1983         break;
1984         /* triggers drawing using in-packet vertex data */
1985     case PACKET3_3D_DRAW_IMMD_2:
1986         if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
1987             DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1988             return -EINVAL;
1989         }
1990         track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1991         track->immd_dwords = pkt->count;
1992         r = r100_cs_track_check(p->rdev, track);
1993         if (r)
1994             return r;
1995         break;
1996         /* triggers drawing using in-packet vertex data */
1997     case PACKET3_3D_DRAW_VBUF_2:
1998         track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1999         r = r100_cs_track_check(p->rdev, track);
2000         if (r)
2001             return r;
2002         break;
2003         /* triggers drawing of vertex buffers setup elsewhere */
2004     case PACKET3_3D_DRAW_INDX_2:
2005         track->vap_vf_cntl = radeon_get_ib_value(p, idx);
2006         r = r100_cs_track_check(p->rdev, track);
2007         if (r)
2008             return r;
2009         break;
2010         /* triggers drawing using indices to vertex buffer */
2011     case PACKET3_3D_DRAW_VBUF:
2012         track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2013         r = r100_cs_track_check(p->rdev, track);
2014         if (r)
2015             return r;
2016         break;
2017         /* triggers drawing of vertex buffers setup elsewhere */
2018     case PACKET3_3D_DRAW_INDX:
2019         track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2020         r = r100_cs_track_check(p->rdev, track);
2021         if (r)
2022             return r;
2023         break;
2024         /* triggers drawing using indices to vertex buffer */
2025     case PACKET3_3D_CLEAR_HIZ:
2026     case PACKET3_3D_CLEAR_ZMASK:
2027         if (p->rdev->hyperz_filp != p->filp)
2028             return -EINVAL;
2029         break;
2030     case PACKET3_NOP:
2031         break;
2032     default:
2033         DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2034         return -EINVAL;
2035     }
2036     return 0;
2037 }
2038 
2039 int r100_cs_parse(struct radeon_cs_parser *p)
2040 {
2041     struct radeon_cs_packet pkt;
2042     struct r100_cs_track *track;
2043     int r;
2044 
2045     track = kzalloc(sizeof(*track), GFP_KERNEL);
2046     if (!track)
2047         return -ENOMEM;
2048     r100_cs_track_clear(p->rdev, track);
2049     p->track = track;
2050     do {
2051         r = radeon_cs_packet_parse(p, &pkt, p->idx);
2052         if (r) {
2053             return r;
2054         }
2055         p->idx += pkt.count + 2;
2056         switch (pkt.type) {
2057         case RADEON_PACKET_TYPE0:
2058             if (p->rdev->family >= CHIP_R200)
2059                 r = r100_cs_parse_packet0(p, &pkt,
2060                     p->rdev->config.r100.reg_safe_bm,
2061                     p->rdev->config.r100.reg_safe_bm_size,
2062                     &r200_packet0_check);
2063             else
2064                 r = r100_cs_parse_packet0(p, &pkt,
2065                     p->rdev->config.r100.reg_safe_bm,
2066                     p->rdev->config.r100.reg_safe_bm_size,
2067                     &r100_packet0_check);
2068             break;
2069         case RADEON_PACKET_TYPE2:
2070             break;
2071         case RADEON_PACKET_TYPE3:
2072             r = r100_packet3_check(p, &pkt);
2073             break;
2074         default:
2075             DRM_ERROR("Unknown packet type %d !\n",
2076                   pkt.type);
2077             return -EINVAL;
2078         }
2079         if (r)
2080             return r;
2081     } while (p->idx < p->chunk_ib->length_dw);
2082     return 0;
2083 }
2084 
2085 static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
2086 {
2087     DRM_ERROR("pitch                      %d\n", t->pitch);
2088     DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
2089     DRM_ERROR("width                      %d\n", t->width);
2090     DRM_ERROR("width_11                   %d\n", t->width_11);
2091     DRM_ERROR("height                     %d\n", t->height);
2092     DRM_ERROR("height_11                  %d\n", t->height_11);
2093     DRM_ERROR("num levels                 %d\n", t->num_levels);
2094     DRM_ERROR("depth                      %d\n", t->txdepth);
2095     DRM_ERROR("bpp                        %d\n", t->cpp);
2096     DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
2097     DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
2098     DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
2099     DRM_ERROR("compress format            %d\n", t->compress_format);
2100 }
2101 
2102 static int r100_track_compress_size(int compress_format, int w, int h)
2103 {
2104     int block_width, block_height, block_bytes;
2105     int wblocks, hblocks;
2106     int min_wblocks;
2107     int sz;
2108 
2109     block_width = 4;
2110     block_height = 4;
2111 
2112     switch (compress_format) {
2113     case R100_TRACK_COMP_DXT1:
2114         block_bytes = 8;
2115         min_wblocks = 4;
2116         break;
2117     default:
2118     case R100_TRACK_COMP_DXT35:
2119         block_bytes = 16;
2120         min_wblocks = 2;
2121         break;
2122     }
2123 
2124     hblocks = (h + block_height - 1) / block_height;
2125     wblocks = (w + block_width - 1) / block_width;
2126     if (wblocks < min_wblocks)
2127         wblocks = min_wblocks;
2128     sz = wblocks * hblocks * block_bytes;
2129     return sz;
2130 }
2131 
2132 static int r100_cs_track_cube(struct radeon_device *rdev,
2133                   struct r100_cs_track *track, unsigned idx)
2134 {
2135     unsigned face, w, h;
2136     struct radeon_bo *cube_robj;
2137     unsigned long size;
2138     unsigned compress_format = track->textures[idx].compress_format;
2139 
2140     for (face = 0; face < 5; face++) {
2141         cube_robj = track->textures[idx].cube_info[face].robj;
2142         w = track->textures[idx].cube_info[face].width;
2143         h = track->textures[idx].cube_info[face].height;
2144 
2145         if (compress_format) {
2146             size = r100_track_compress_size(compress_format, w, h);
2147         } else
2148             size = w * h;
2149         size *= track->textures[idx].cpp;
2150 
2151         size += track->textures[idx].cube_info[face].offset;
2152 
2153         if (size > radeon_bo_size(cube_robj)) {
2154             DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
2155                   size, radeon_bo_size(cube_robj));
2156             r100_cs_track_texture_print(&track->textures[idx]);
2157             return -1;
2158         }
2159     }
2160     return 0;
2161 }
2162 
2163 static int r100_cs_track_texture_check(struct radeon_device *rdev,
2164                        struct r100_cs_track *track)
2165 {
2166     struct radeon_bo *robj;
2167     unsigned long size;
2168     unsigned u, i, w, h, d;
2169     int ret;
2170 
2171     for (u = 0; u < track->num_texture; u++) {
2172         if (!track->textures[u].enabled)
2173             continue;
2174         if (track->textures[u].lookup_disable)
2175             continue;
2176         robj = track->textures[u].robj;
2177         if (robj == NULL) {
2178             DRM_ERROR("No texture bound to unit %u\n", u);
2179             return -EINVAL;
2180         }
2181         size = 0;
2182         for (i = 0; i <= track->textures[u].num_levels; i++) {
2183             if (track->textures[u].use_pitch) {
2184                 if (rdev->family < CHIP_R300)
2185                     w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
2186                 else
2187                     w = track->textures[u].pitch / (1 << i);
2188             } else {
2189                 w = track->textures[u].width;
2190                 if (rdev->family >= CHIP_RV515)
2191                     w |= track->textures[u].width_11;
2192                 w = w / (1 << i);
2193                 if (track->textures[u].roundup_w)
2194                     w = roundup_pow_of_two(w);
2195             }
2196             h = track->textures[u].height;
2197             if (rdev->family >= CHIP_RV515)
2198                 h |= track->textures[u].height_11;
2199             h = h / (1 << i);
2200             if (track->textures[u].roundup_h)
2201                 h = roundup_pow_of_two(h);
2202             if (track->textures[u].tex_coord_type == 1) {
2203                 d = (1 << track->textures[u].txdepth) / (1 << i);
2204                 if (!d)
2205                     d = 1;
2206             } else {
2207                 d = 1;
2208             }
2209             if (track->textures[u].compress_format) {
2210 
2211                 size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
2212                 /* compressed textures are block based */
2213             } else
2214                 size += w * h * d;
2215         }
2216         size *= track->textures[u].cpp;
2217 
2218         switch (track->textures[u].tex_coord_type) {
2219         case 0:
2220         case 1:
2221             break;
2222         case 2:
2223             if (track->separate_cube) {
2224                 ret = r100_cs_track_cube(rdev, track, u);
2225                 if (ret)
2226                     return ret;
2227             } else
2228                 size *= 6;
2229             break;
2230         default:
2231             DRM_ERROR("Invalid texture coordinate type %u for unit "
2232                   "%u\n", track->textures[u].tex_coord_type, u);
2233             return -EINVAL;
2234         }
2235         if (size > radeon_bo_size(robj)) {
2236             DRM_ERROR("Texture of unit %u needs %lu bytes but is "
2237                   "%lu\n", u, size, radeon_bo_size(robj));
2238             r100_cs_track_texture_print(&track->textures[u]);
2239             return -EINVAL;
2240         }
2241     }
2242     return 0;
2243 }
2244 
2245 int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
2246 {
2247     unsigned i;
2248     unsigned long size;
2249     unsigned prim_walk;
2250     unsigned nverts;
2251     unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
2252 
2253     if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
2254         !track->blend_read_enable)
2255         num_cb = 0;
2256 
2257     for (i = 0; i < num_cb; i++) {
2258         if (track->cb[i].robj == NULL) {
2259             DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
2260             return -EINVAL;
2261         }
2262         size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
2263         size += track->cb[i].offset;
2264         if (size > radeon_bo_size(track->cb[i].robj)) {
2265             DRM_ERROR("[drm] Buffer too small for color buffer %d "
2266                   "(need %lu have %lu) !\n", i, size,
2267                   radeon_bo_size(track->cb[i].robj));
2268             DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
2269                   i, track->cb[i].pitch, track->cb[i].cpp,
2270                   track->cb[i].offset, track->maxy);
2271             return -EINVAL;
2272         }
2273     }
2274     track->cb_dirty = false;
2275 
2276     if (track->zb_dirty && track->z_enabled) {
2277         if (track->zb.robj == NULL) {
2278             DRM_ERROR("[drm] No buffer for z buffer !\n");
2279             return -EINVAL;
2280         }
2281         size = track->zb.pitch * track->zb.cpp * track->maxy;
2282         size += track->zb.offset;
2283         if (size > radeon_bo_size(track->zb.robj)) {
2284             DRM_ERROR("[drm] Buffer too small for z buffer "
2285                   "(need %lu have %lu) !\n", size,
2286                   radeon_bo_size(track->zb.robj));
2287             DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
2288                   track->zb.pitch, track->zb.cpp,
2289                   track->zb.offset, track->maxy);
2290             return -EINVAL;
2291         }
2292     }
2293     track->zb_dirty = false;
2294 
2295     if (track->aa_dirty && track->aaresolve) {
2296         if (track->aa.robj == NULL) {
2297             DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
2298             return -EINVAL;
2299         }
2300         /* I believe the format comes from colorbuffer0. */
2301         size = track->aa.pitch * track->cb[0].cpp * track->maxy;
2302         size += track->aa.offset;
2303         if (size > radeon_bo_size(track->aa.robj)) {
2304             DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
2305                   "(need %lu have %lu) !\n", i, size,
2306                   radeon_bo_size(track->aa.robj));
2307             DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
2308                   i, track->aa.pitch, track->cb[0].cpp,
2309                   track->aa.offset, track->maxy);
2310             return -EINVAL;
2311         }
2312     }
2313     track->aa_dirty = false;
2314 
2315     prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
2316     if (track->vap_vf_cntl & (1 << 14)) {
2317         nverts = track->vap_alt_nverts;
2318     } else {
2319         nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
2320     }
2321     switch (prim_walk) {
2322     case 1:
2323         for (i = 0; i < track->num_arrays; i++) {
2324             size = track->arrays[i].esize * track->max_indx * 4;
2325             if (track->arrays[i].robj == NULL) {
2326                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
2327                       "bound\n", prim_walk, i);
2328                 return -EINVAL;
2329             }
2330             if (size > radeon_bo_size(track->arrays[i].robj)) {
2331                 dev_err(rdev->dev, "(PW %u) Vertex array %u "
2332                     "need %lu dwords have %lu dwords\n",
2333                     prim_walk, i, size >> 2,
2334                     radeon_bo_size(track->arrays[i].robj)
2335                     >> 2);
2336                 DRM_ERROR("Max indices %u\n", track->max_indx);
2337                 return -EINVAL;
2338             }
2339         }
2340         break;
2341     case 2:
2342         for (i = 0; i < track->num_arrays; i++) {
2343             size = track->arrays[i].esize * (nverts - 1) * 4;
2344             if (track->arrays[i].robj == NULL) {
2345                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
2346                       "bound\n", prim_walk, i);
2347                 return -EINVAL;
2348             }
2349             if (size > radeon_bo_size(track->arrays[i].robj)) {
2350                 dev_err(rdev->dev, "(PW %u) Vertex array %u "
2351                     "need %lu dwords have %lu dwords\n",
2352                     prim_walk, i, size >> 2,
2353                     radeon_bo_size(track->arrays[i].robj)
2354                     >> 2);
2355                 return -EINVAL;
2356             }
2357         }
2358         break;
2359     case 3:
2360         size = track->vtx_size * nverts;
2361         if (size != track->immd_dwords) {
2362             DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2363                   track->immd_dwords, size);
2364             DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2365                   nverts, track->vtx_size);
2366             return -EINVAL;
2367         }
2368         break;
2369     default:
2370         DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2371               prim_walk);
2372         return -EINVAL;
2373     }
2374 
2375     if (track->tex_dirty) {
2376         track->tex_dirty = false;
2377         return r100_cs_track_texture_check(rdev, track);
2378     }
2379     return 0;
2380 }
2381 
2382 void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
2383 {
2384     unsigned i, face;
2385 
2386     track->cb_dirty = true;
2387     track->zb_dirty = true;
2388     track->tex_dirty = true;
2389     track->aa_dirty = true;
2390 
2391     if (rdev->family < CHIP_R300) {
2392         track->num_cb = 1;
2393         if (rdev->family <= CHIP_RS200)
2394             track->num_texture = 3;
2395         else
2396             track->num_texture = 6;
2397         track->maxy = 2048;
2398         track->separate_cube = true;
2399     } else {
2400         track->num_cb = 4;
2401         track->num_texture = 16;
2402         track->maxy = 4096;
2403         track->separate_cube = false;
2404         track->aaresolve = false;
2405         track->aa.robj = NULL;
2406     }
2407 
2408     for (i = 0; i < track->num_cb; i++) {
2409         track->cb[i].robj = NULL;
2410         track->cb[i].pitch = 8192;
2411         track->cb[i].cpp = 16;
2412         track->cb[i].offset = 0;
2413     }
2414     track->z_enabled = true;
2415     track->zb.robj = NULL;
2416     track->zb.pitch = 8192;
2417     track->zb.cpp = 4;
2418     track->zb.offset = 0;
2419     track->vtx_size = 0x7F;
2420     track->immd_dwords = 0xFFFFFFFFUL;
2421     track->num_arrays = 11;
2422     track->max_indx = 0x00FFFFFFUL;
2423     for (i = 0; i < track->num_arrays; i++) {
2424         track->arrays[i].robj = NULL;
2425         track->arrays[i].esize = 0x7F;
2426     }
2427     for (i = 0; i < track->num_texture; i++) {
2428         track->textures[i].compress_format = R100_TRACK_COMP_NONE;
2429         track->textures[i].pitch = 16536;
2430         track->textures[i].width = 16536;
2431         track->textures[i].height = 16536;
2432         track->textures[i].width_11 = 1 << 11;
2433         track->textures[i].height_11 = 1 << 11;
2434         track->textures[i].num_levels = 12;
2435         if (rdev->family <= CHIP_RS200) {
2436             track->textures[i].tex_coord_type = 0;
2437             track->textures[i].txdepth = 0;
2438         } else {
2439             track->textures[i].txdepth = 16;
2440             track->textures[i].tex_coord_type = 1;
2441         }
2442         track->textures[i].cpp = 64;
2443         track->textures[i].robj = NULL;
2444         /* CS IB emission code makes sure texture unit are disabled */
2445         track->textures[i].enabled = false;
2446         track->textures[i].lookup_disable = false;
2447         track->textures[i].roundup_w = true;
2448         track->textures[i].roundup_h = true;
2449         if (track->separate_cube)
2450             for (face = 0; face < 5; face++) {
2451                 track->textures[i].cube_info[face].robj = NULL;
2452                 track->textures[i].cube_info[face].width = 16536;
2453                 track->textures[i].cube_info[face].height = 16536;
2454                 track->textures[i].cube_info[face].offset = 0;
2455             }
2456     }
2457 }
2458 
2459 /*
2460  * Global GPU functions
2461  */
2462 static void r100_errata(struct radeon_device *rdev)
2463 {
2464     rdev->pll_errata = 0;
2465 
2466     if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
2467         rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
2468     }
2469 
2470     if (rdev->family == CHIP_RV100 ||
2471         rdev->family == CHIP_RS100 ||
2472         rdev->family == CHIP_RS200) {
2473         rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
2474     }
2475 }
2476 
2477 static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
2478 {
2479     unsigned i;
2480     uint32_t tmp;
2481 
2482     for (i = 0; i < rdev->usec_timeout; i++) {
2483         tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
2484         if (tmp >= n) {
2485             return 0;
2486         }
2487         udelay(1);
2488     }
2489     return -1;
2490 }
2491 
2492 int r100_gui_wait_for_idle(struct radeon_device *rdev)
2493 {
2494     unsigned i;
2495     uint32_t tmp;
2496 
2497     if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
2498         pr_warn("radeon: wait for empty RBBM fifo failed! Bad things might happen.\n");
2499     }
2500     for (i = 0; i < rdev->usec_timeout; i++) {
2501         tmp = RREG32(RADEON_RBBM_STATUS);
2502         if (!(tmp & RADEON_RBBM_ACTIVE)) {
2503             return 0;
2504         }
2505         udelay(1);
2506     }
2507     return -1;
2508 }
2509 
2510 int r100_mc_wait_for_idle(struct radeon_device *rdev)
2511 {
2512     unsigned i;
2513     uint32_t tmp;
2514 
2515     for (i = 0; i < rdev->usec_timeout; i++) {
2516         /* read MC_STATUS */
2517         tmp = RREG32(RADEON_MC_STATUS);
2518         if (tmp & RADEON_MC_IDLE) {
2519             return 0;
2520         }
2521         udelay(1);
2522     }
2523     return -1;
2524 }
2525 
2526 bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2527 {
2528     u32 rbbm_status;
2529 
2530     rbbm_status = RREG32(R_000E40_RBBM_STATUS);
2531     if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
2532         radeon_ring_lockup_update(rdev, ring);
2533         return false;
2534     }
2535     return radeon_ring_test_lockup(rdev, ring);
2536 }
2537 
2538 /* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
2539 void r100_enable_bm(struct radeon_device *rdev)
2540 {
2541     uint32_t tmp;
2542     /* Enable bus mastering */
2543     tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
2544     WREG32(RADEON_BUS_CNTL, tmp);
2545 }
2546 
2547 void r100_bm_disable(struct radeon_device *rdev)
2548 {
2549     u32 tmp;
2550 
2551     /* disable bus mastering */
2552     tmp = RREG32(R_000030_BUS_CNTL);
2553     WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044);
2554     mdelay(1);
2555     WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042);
2556     mdelay(1);
2557     WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
2558     tmp = RREG32(RADEON_BUS_CNTL);
2559     mdelay(1);
2560     pci_clear_master(rdev->pdev);
2561     mdelay(1);
2562 }
2563 
2564 int r100_asic_reset(struct radeon_device *rdev, bool hard)
2565 {
2566     struct r100_mc_save save;
2567     u32 status, tmp;
2568     int ret = 0;
2569 
2570     status = RREG32(R_000E40_RBBM_STATUS);
2571     if (!G_000E40_GUI_ACTIVE(status)) {
2572         return 0;
2573     }
2574     r100_mc_stop(rdev, &save);
2575     status = RREG32(R_000E40_RBBM_STATUS);
2576     dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2577     /* stop CP */
2578     WREG32(RADEON_CP_CSQ_CNTL, 0);
2579     tmp = RREG32(RADEON_CP_RB_CNTL);
2580     WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
2581     WREG32(RADEON_CP_RB_RPTR_WR, 0);
2582     WREG32(RADEON_CP_RB_WPTR, 0);
2583     WREG32(RADEON_CP_RB_CNTL, tmp);
2584     /* save PCI state */
2585     pci_save_state(rdev->pdev);
2586     /* disable bus mastering */
2587     r100_bm_disable(rdev);
2588     WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) |
2589                     S_0000F0_SOFT_RESET_RE(1) |
2590                     S_0000F0_SOFT_RESET_PP(1) |
2591                     S_0000F0_SOFT_RESET_RB(1));
2592     RREG32(R_0000F0_RBBM_SOFT_RESET);
2593     mdelay(500);
2594     WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2595     mdelay(1);
2596     status = RREG32(R_000E40_RBBM_STATUS);
2597     dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2598     /* reset CP */
2599     WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
2600     RREG32(R_0000F0_RBBM_SOFT_RESET);
2601     mdelay(500);
2602     WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2603     mdelay(1);
2604     status = RREG32(R_000E40_RBBM_STATUS);
2605     dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2606     /* restore PCI & busmastering */
2607     pci_restore_state(rdev->pdev);
2608     r100_enable_bm(rdev);
2609     /* Check if GPU is idle */
2610     if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
2611         G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
2612         dev_err(rdev->dev, "failed to reset GPU\n");
2613         ret = -1;
2614     } else
2615         dev_info(rdev->dev, "GPU reset succeed\n");
2616     r100_mc_resume(rdev, &save);
2617     return ret;
2618 }
2619 
2620 void r100_set_common_regs(struct radeon_device *rdev)
2621 {
2622     bool force_dac2 = false;
2623     u32 tmp;
2624 
2625     /* set these so they don't interfere with anything */
2626     WREG32(RADEON_OV0_SCALE_CNTL, 0);
2627     WREG32(RADEON_SUBPIC_CNTL, 0);
2628     WREG32(RADEON_VIPH_CONTROL, 0);
2629     WREG32(RADEON_I2C_CNTL_1, 0);
2630     WREG32(RADEON_DVI_I2C_CNTL_1, 0);
2631     WREG32(RADEON_CAP0_TRIG_CNTL, 0);
2632     WREG32(RADEON_CAP1_TRIG_CNTL, 0);
2633 
2634     /* always set up dac2 on rn50 and some rv100 as lots
2635      * of servers seem to wire it up to a VGA port but
2636      * don't report it in the bios connector
2637      * table.
2638      */
2639     switch (rdev->pdev->device) {
2640         /* RN50 */
2641     case 0x515e:
2642     case 0x5969:
2643         force_dac2 = true;
2644         break;
2645         /* RV100*/
2646     case 0x5159:
2647     case 0x515a:
2648         /* DELL triple head servers */
2649         if ((rdev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
2650             ((rdev->pdev->subsystem_device == 0x016c) ||
2651              (rdev->pdev->subsystem_device == 0x016d) ||
2652              (rdev->pdev->subsystem_device == 0x016e) ||
2653              (rdev->pdev->subsystem_device == 0x016f) ||
2654              (rdev->pdev->subsystem_device == 0x0170) ||
2655              (rdev->pdev->subsystem_device == 0x017d) ||
2656              (rdev->pdev->subsystem_device == 0x017e) ||
2657              (rdev->pdev->subsystem_device == 0x0183) ||
2658              (rdev->pdev->subsystem_device == 0x018a) ||
2659              (rdev->pdev->subsystem_device == 0x019a)))
2660             force_dac2 = true;
2661         break;
2662     }
2663 
2664     if (force_dac2) {
2665         u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
2666         u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
2667         u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
2668 
2669         /* For CRT on DAC2, don't turn it on if BIOS didn't
2670            enable it, even it's detected.
2671         */
2672 
2673         /* force it to crtc0 */
2674         dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
2675         dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
2676         disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
2677 
2678         /* set up the TV DAC */
2679         tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
2680                  RADEON_TV_DAC_STD_MASK |
2681                  RADEON_TV_DAC_RDACPD |
2682                  RADEON_TV_DAC_GDACPD |
2683                  RADEON_TV_DAC_BDACPD |
2684                  RADEON_TV_DAC_BGADJ_MASK |
2685                  RADEON_TV_DAC_DACADJ_MASK);
2686         tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
2687                 RADEON_TV_DAC_NHOLD |
2688                 RADEON_TV_DAC_STD_PS2 |
2689                 (0x58 << 16));
2690 
2691         WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
2692         WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
2693         WREG32(RADEON_DAC_CNTL2, dac2_cntl);
2694     }
2695 
2696     /* switch PM block to ACPI mode */
2697     tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL);
2698     tmp &= ~RADEON_PM_MODE_SEL;
2699     WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp);
2700 
2701 }
2702 
2703 /*
2704  * VRAM info
2705  */
2706 static void r100_vram_get_type(struct radeon_device *rdev)
2707 {
2708     uint32_t tmp;
2709 
2710     rdev->mc.vram_is_ddr = false;
2711     if (rdev->flags & RADEON_IS_IGP)
2712         rdev->mc.vram_is_ddr = true;
2713     else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
2714         rdev->mc.vram_is_ddr = true;
2715     if ((rdev->family == CHIP_RV100) ||
2716         (rdev->family == CHIP_RS100) ||
2717         (rdev->family == CHIP_RS200)) {
2718         tmp = RREG32(RADEON_MEM_CNTL);
2719         if (tmp & RV100_HALF_MODE) {
2720             rdev->mc.vram_width = 32;
2721         } else {
2722             rdev->mc.vram_width = 64;
2723         }
2724         if (rdev->flags & RADEON_SINGLE_CRTC) {
2725             rdev->mc.vram_width /= 4;
2726             rdev->mc.vram_is_ddr = true;
2727         }
2728     } else if (rdev->family <= CHIP_RV280) {
2729         tmp = RREG32(RADEON_MEM_CNTL);
2730         if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
2731             rdev->mc.vram_width = 128;
2732         } else {
2733             rdev->mc.vram_width = 64;
2734         }
2735     } else {
2736         /* newer IGPs */
2737         rdev->mc.vram_width = 128;
2738     }
2739 }
2740 
2741 static u32 r100_get_accessible_vram(struct radeon_device *rdev)
2742 {
2743     u32 aper_size;
2744     u8 byte;
2745 
2746     aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2747 
2748     /* Set HDP_APER_CNTL only on cards that are known not to be broken,
2749      * that is has the 2nd generation multifunction PCI interface
2750      */
2751     if (rdev->family == CHIP_RV280 ||
2752         rdev->family >= CHIP_RV350) {
2753         WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
2754                ~RADEON_HDP_APER_CNTL);
2755         DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
2756         return aper_size * 2;
2757     }
2758 
2759     /* Older cards have all sorts of funny issues to deal with. First
2760      * check if it's a multifunction card by reading the PCI config
2761      * header type... Limit those to one aperture size
2762      */
2763     pci_read_config_byte(rdev->pdev, 0xe, &byte);
2764     if (byte & 0x80) {
2765         DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
2766         DRM_INFO("Limiting VRAM to one aperture\n");
2767         return aper_size;
2768     }
2769 
2770     /* Single function older card. We read HDP_APER_CNTL to see how the BIOS
2771      * have set it up. We don't write this as it's broken on some ASICs but
2772      * we expect the BIOS to have done the right thing (might be too optimistic...)
2773      */
2774     if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
2775         return aper_size * 2;
2776     return aper_size;
2777 }
2778 
2779 void r100_vram_init_sizes(struct radeon_device *rdev)
2780 {
2781     u64 config_aper_size;
2782 
2783     /* work out accessible VRAM */
2784     rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2785     rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2786     rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
2787     /* FIXME we don't use the second aperture yet when we could use it */
2788     if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2789         rdev->mc.visible_vram_size = rdev->mc.aper_size;
2790     config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2791     if (rdev->flags & RADEON_IS_IGP) {
2792         uint32_t tom;
2793         /* read NB_TOM to get the amount of ram stolen for the GPU */
2794         tom = RREG32(RADEON_NB_TOM);
2795         rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
2796         WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2797         rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2798     } else {
2799         rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
2800         /* Some production boards of m6 will report 0
2801          * if it's 8 MB
2802          */
2803         if (rdev->mc.real_vram_size == 0) {
2804             rdev->mc.real_vram_size = 8192 * 1024;
2805             WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2806         }
2807         /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM -
2808          * Novell bug 204882 + along with lots of ubuntu ones
2809          */
2810         if (rdev->mc.aper_size > config_aper_size)
2811             config_aper_size = rdev->mc.aper_size;
2812 
2813         if (config_aper_size > rdev->mc.real_vram_size)
2814             rdev->mc.mc_vram_size = config_aper_size;
2815         else
2816             rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2817     }
2818 }
2819 
2820 void r100_vga_set_state(struct radeon_device *rdev, bool state)
2821 {
2822     uint32_t temp;
2823 
2824     temp = RREG32(RADEON_CONFIG_CNTL);
2825     if (!state) {
2826         temp &= ~RADEON_CFG_VGA_RAM_EN;
2827         temp |= RADEON_CFG_VGA_IO_DIS;
2828     } else {
2829         temp &= ~RADEON_CFG_VGA_IO_DIS;
2830     }
2831     WREG32(RADEON_CONFIG_CNTL, temp);
2832 }
2833 
2834 static void r100_mc_init(struct radeon_device *rdev)
2835 {
2836     u64 base;
2837 
2838     r100_vram_get_type(rdev);
2839     r100_vram_init_sizes(rdev);
2840     base = rdev->mc.aper_base;
2841     if (rdev->flags & RADEON_IS_IGP)
2842         base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
2843     radeon_vram_location(rdev, &rdev->mc, base);
2844     rdev->mc.gtt_base_align = 0;
2845     if (!(rdev->flags & RADEON_IS_AGP))
2846         radeon_gtt_location(rdev, &rdev->mc);
2847     radeon_update_bandwidth_info(rdev);
2848 }
2849 
2850 
2851 /*
2852  * Indirect registers accessor
2853  */
2854 void r100_pll_errata_after_index(struct radeon_device *rdev)
2855 {
2856     if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) {
2857         (void)RREG32(RADEON_CLOCK_CNTL_DATA);
2858         (void)RREG32(RADEON_CRTC_GEN_CNTL);
2859     }
2860 }
2861 
2862 static void r100_pll_errata_after_data(struct radeon_device *rdev)
2863 {
2864     /* This workarounds is necessary on RV100, RS100 and RS200 chips
2865      * or the chip could hang on a subsequent access
2866      */
2867     if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
2868         mdelay(5);
2869     }
2870 
2871     /* This function is required to workaround a hardware bug in some (all?)
2872      * revisions of the R300.  This workaround should be called after every
2873      * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
2874      * may not be correct.
2875      */
2876     if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
2877         uint32_t save, tmp;
2878 
2879         save = RREG32(RADEON_CLOCK_CNTL_INDEX);
2880         tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
2881         WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
2882         tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
2883         WREG32(RADEON_CLOCK_CNTL_INDEX, save);
2884     }
2885 }
2886 
2887 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
2888 {
2889     unsigned long flags;
2890     uint32_t data;
2891 
2892     spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2893     WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
2894     r100_pll_errata_after_index(rdev);
2895     data = RREG32(RADEON_CLOCK_CNTL_DATA);
2896     r100_pll_errata_after_data(rdev);
2897     spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2898     return data;
2899 }
2900 
2901 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
2902 {
2903     unsigned long flags;
2904 
2905     spin_lock_irqsave(&rdev->pll_idx_lock, flags);
2906     WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
2907     r100_pll_errata_after_index(rdev);
2908     WREG32(RADEON_CLOCK_CNTL_DATA, v);
2909     r100_pll_errata_after_data(rdev);
2910     spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
2911 }
2912 
2913 static void r100_set_safe_registers(struct radeon_device *rdev)
2914 {
2915     if (ASIC_IS_RN50(rdev)) {
2916         rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
2917         rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
2918     } else if (rdev->family < CHIP_R200) {
2919         rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
2920         rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
2921     } else {
2922         r200_set_safe_registers(rdev);
2923     }
2924 }
2925 
2926 /*
2927  * Debugfs info
2928  */
2929 #if defined(CONFIG_DEBUG_FS)
2930 static int r100_debugfs_rbbm_info_show(struct seq_file *m, void *unused)
2931 {
2932     struct radeon_device *rdev = (struct radeon_device *)m->private;
2933     uint32_t reg, value;
2934     unsigned i;
2935 
2936     seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
2937     seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
2938     seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2939     for (i = 0; i < 64; i++) {
2940         WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
2941         reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
2942         WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
2943         value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
2944         seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
2945     }
2946     return 0;
2947 }
2948 
2949 static int r100_debugfs_cp_ring_info_show(struct seq_file *m, void *unused)
2950 {
2951     struct radeon_device *rdev = (struct radeon_device *)m->private;
2952     struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2953     uint32_t rdp, wdp;
2954     unsigned count, i, j;
2955 
2956     radeon_ring_free_size(rdev, ring);
2957     rdp = RREG32(RADEON_CP_RB_RPTR);
2958     wdp = RREG32(RADEON_CP_RB_WPTR);
2959     count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
2960     seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2961     seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
2962     seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
2963     seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
2964     seq_printf(m, "%u dwords in ring\n", count);
2965     if (ring->ready) {
2966         for (j = 0; j <= count; j++) {
2967             i = (rdp + j) & ring->ptr_mask;
2968             seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
2969         }
2970     }
2971     return 0;
2972 }
2973 
2974 
2975 static int r100_debugfs_cp_csq_fifo_show(struct seq_file *m, void *unused)
2976 {
2977     struct radeon_device *rdev = (struct radeon_device *)m->private;
2978     uint32_t csq_stat, csq2_stat, tmp;
2979     unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
2980     unsigned i;
2981 
2982     seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2983     seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
2984     csq_stat = RREG32(RADEON_CP_CSQ_STAT);
2985     csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
2986     r_rptr = (csq_stat >> 0) & 0x3ff;
2987     r_wptr = (csq_stat >> 10) & 0x3ff;
2988     ib1_rptr = (csq_stat >> 20) & 0x3ff;
2989     ib1_wptr = (csq2_stat >> 0) & 0x3ff;
2990     ib2_rptr = (csq2_stat >> 10) & 0x3ff;
2991     ib2_wptr = (csq2_stat >> 20) & 0x3ff;
2992     seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
2993     seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
2994     seq_printf(m, "Ring rptr %u\n", r_rptr);
2995     seq_printf(m, "Ring wptr %u\n", r_wptr);
2996     seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
2997     seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
2998     seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
2999     seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
3000     /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
3001      * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
3002     seq_printf(m, "Ring fifo:\n");
3003     for (i = 0; i < 256; i++) {
3004         WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3005         tmp = RREG32(RADEON_CP_CSQ_DATA);
3006         seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
3007     }
3008     seq_printf(m, "Indirect1 fifo:\n");
3009     for (i = 256; i <= 512; i++) {
3010         WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3011         tmp = RREG32(RADEON_CP_CSQ_DATA);
3012         seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
3013     }
3014     seq_printf(m, "Indirect2 fifo:\n");
3015     for (i = 640; i < ib1_wptr; i++) {
3016         WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3017         tmp = RREG32(RADEON_CP_CSQ_DATA);
3018         seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
3019     }
3020     return 0;
3021 }
3022 
3023 static int r100_debugfs_mc_info_show(struct seq_file *m, void *unused)
3024 {
3025     struct radeon_device *rdev = (struct radeon_device *)m->private;
3026     uint32_t tmp;
3027 
3028     tmp = RREG32(RADEON_CONFIG_MEMSIZE);
3029     seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
3030     tmp = RREG32(RADEON_MC_FB_LOCATION);
3031     seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
3032     tmp = RREG32(RADEON_BUS_CNTL);
3033     seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
3034     tmp = RREG32(RADEON_MC_AGP_LOCATION);
3035     seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
3036     tmp = RREG32(RADEON_AGP_BASE);
3037     seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
3038     tmp = RREG32(RADEON_HOST_PATH_CNTL);
3039     seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
3040     tmp = RREG32(0x01D0);
3041     seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
3042     tmp = RREG32(RADEON_AIC_LO_ADDR);
3043     seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
3044     tmp = RREG32(RADEON_AIC_HI_ADDR);
3045     seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
3046     tmp = RREG32(0x01E4);
3047     seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
3048     return 0;
3049 }
3050 
3051 DEFINE_SHOW_ATTRIBUTE(r100_debugfs_rbbm_info);
3052 DEFINE_SHOW_ATTRIBUTE(r100_debugfs_cp_ring_info);
3053 DEFINE_SHOW_ATTRIBUTE(r100_debugfs_cp_csq_fifo);
3054 DEFINE_SHOW_ATTRIBUTE(r100_debugfs_mc_info);
3055 
3056 #endif
3057 
3058 void  r100_debugfs_rbbm_init(struct radeon_device *rdev)
3059 {
3060 #if defined(CONFIG_DEBUG_FS)
3061     struct dentry *root = rdev->ddev->primary->debugfs_root;
3062 
3063     debugfs_create_file("r100_rbbm_info", 0444, root, rdev,
3064                 &r100_debugfs_rbbm_info_fops);
3065 #endif
3066 }
3067 
3068 void r100_debugfs_cp_init(struct radeon_device *rdev)
3069 {
3070 #if defined(CONFIG_DEBUG_FS)
3071     struct dentry *root = rdev->ddev->primary->debugfs_root;
3072 
3073     debugfs_create_file("r100_cp_ring_info", 0444, root, rdev,
3074                 &r100_debugfs_cp_ring_info_fops);
3075     debugfs_create_file("r100_cp_csq_fifo", 0444, root, rdev,
3076                 &r100_debugfs_cp_csq_fifo_fops);
3077 #endif
3078 }
3079 
3080 void  r100_debugfs_mc_info_init(struct radeon_device *rdev)
3081 {
3082 #if defined(CONFIG_DEBUG_FS)
3083     struct dentry *root = rdev->ddev->primary->debugfs_root;
3084 
3085     debugfs_create_file("r100_mc_info", 0444, root, rdev,
3086                 &r100_debugfs_mc_info_fops);
3087 #endif
3088 }
3089 
3090 int r100_set_surface_reg(struct radeon_device *rdev, int reg,
3091              uint32_t tiling_flags, uint32_t pitch,
3092              uint32_t offset, uint32_t obj_size)
3093 {
3094     int surf_index = reg * 16;
3095     int flags = 0;
3096 
3097     if (rdev->family <= CHIP_RS200) {
3098         if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3099                  == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3100             flags |= RADEON_SURF_TILE_COLOR_BOTH;
3101         if (tiling_flags & RADEON_TILING_MACRO)
3102             flags |= RADEON_SURF_TILE_COLOR_MACRO;
3103         /* setting pitch to 0 disables tiling */
3104         if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3105                 == 0)
3106             pitch = 0;
3107     } else if (rdev->family <= CHIP_RV280) {
3108         if (tiling_flags & (RADEON_TILING_MACRO))
3109             flags |= R200_SURF_TILE_COLOR_MACRO;
3110         if (tiling_flags & RADEON_TILING_MICRO)
3111             flags |= R200_SURF_TILE_COLOR_MICRO;
3112     } else {
3113         if (tiling_flags & RADEON_TILING_MACRO)
3114             flags |= R300_SURF_TILE_MACRO;
3115         if (tiling_flags & RADEON_TILING_MICRO)
3116             flags |= R300_SURF_TILE_MICRO;
3117     }
3118 
3119     if (tiling_flags & RADEON_TILING_SWAP_16BIT)
3120         flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
3121     if (tiling_flags & RADEON_TILING_SWAP_32BIT)
3122         flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
3123 
3124     /* r100/r200 divide by 16 */
3125     if (rdev->family < CHIP_R300)
3126         flags |= pitch / 16;
3127     else
3128         flags |= pitch / 8;
3129 
3130 
3131     DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
3132     WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
3133     WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
3134     WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
3135     return 0;
3136 }
3137 
3138 void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
3139 {
3140     int surf_index = reg * 16;
3141     WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
3142 }
3143 
3144 void r100_bandwidth_update(struct radeon_device *rdev)
3145 {
3146     fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
3147     fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
3148     fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff;
3149     fixed20_12 crit_point_ff = {0};
3150     uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
3151     fixed20_12 memtcas_ff[8] = {
3152         dfixed_init(1),
3153         dfixed_init(2),
3154         dfixed_init(3),
3155         dfixed_init(0),
3156         dfixed_init_half(1),
3157         dfixed_init_half(2),
3158         dfixed_init(0),
3159     };
3160     fixed20_12 memtcas_rs480_ff[8] = {
3161         dfixed_init(0),
3162         dfixed_init(1),
3163         dfixed_init(2),
3164         dfixed_init(3),
3165         dfixed_init(0),
3166         dfixed_init_half(1),
3167         dfixed_init_half(2),
3168         dfixed_init_half(3),
3169     };
3170     fixed20_12 memtcas2_ff[8] = {
3171         dfixed_init(0),
3172         dfixed_init(1),
3173         dfixed_init(2),
3174         dfixed_init(3),
3175         dfixed_init(4),
3176         dfixed_init(5),
3177         dfixed_init(6),
3178         dfixed_init(7),
3179     };
3180     fixed20_12 memtrbs[8] = {
3181         dfixed_init(1),
3182         dfixed_init_half(1),
3183         dfixed_init(2),
3184         dfixed_init_half(2),
3185         dfixed_init(3),
3186         dfixed_init_half(3),
3187         dfixed_init(4),
3188         dfixed_init_half(4)
3189     };
3190     fixed20_12 memtrbs_r4xx[8] = {
3191         dfixed_init(4),
3192         dfixed_init(5),
3193         dfixed_init(6),
3194         dfixed_init(7),
3195         dfixed_init(8),
3196         dfixed_init(9),
3197         dfixed_init(10),
3198         dfixed_init(11)
3199     };
3200     fixed20_12 min_mem_eff;
3201     fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
3202     fixed20_12 cur_latency_mclk, cur_latency_sclk;
3203     fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate = {0},
3204         disp_drain_rate2, read_return_rate;
3205     fixed20_12 time_disp1_drop_priority;
3206     int c;
3207     int cur_size = 16;       /* in octawords */
3208     int critical_point = 0, critical_point2;
3209 /*  uint32_t read_return_rate, time_disp1_drop_priority; */
3210     int stop_req, max_stop_req;
3211     struct drm_display_mode *mode1 = NULL;
3212     struct drm_display_mode *mode2 = NULL;
3213     uint32_t pixel_bytes1 = 0;
3214     uint32_t pixel_bytes2 = 0;
3215 
3216     /* Guess line buffer size to be 8192 pixels */
3217     u32 lb_size = 8192;
3218 
3219     if (!rdev->mode_info.mode_config_initialized)
3220         return;
3221 
3222     radeon_update_display_priority(rdev);
3223 
3224     if (rdev->mode_info.crtcs[0]->base.enabled) {
3225         const struct drm_framebuffer *fb =
3226             rdev->mode_info.crtcs[0]->base.primary->fb;
3227 
3228         mode1 = &rdev->mode_info.crtcs[0]->base.mode;
3229         pixel_bytes1 = fb->format->cpp[0];
3230     }
3231     if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3232         if (rdev->mode_info.crtcs[1]->base.enabled) {
3233             const struct drm_framebuffer *fb =
3234                 rdev->mode_info.crtcs[1]->base.primary->fb;
3235 
3236             mode2 = &rdev->mode_info.crtcs[1]->base.mode;
3237             pixel_bytes2 = fb->format->cpp[0];
3238         }
3239     }
3240 
3241     min_mem_eff.full = dfixed_const_8(0);
3242     /* get modes */
3243     if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
3244         uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
3245         mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
3246         mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
3247         /* check crtc enables */
3248         if (mode2)
3249             mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
3250         if (mode1)
3251             mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
3252         WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
3253     }
3254 
3255     /*
3256      * determine is there is enough bw for current mode
3257      */
3258     sclk_ff = rdev->pm.sclk;
3259     mclk_ff = rdev->pm.mclk;
3260 
3261     temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
3262     temp_ff.full = dfixed_const(temp);
3263     mem_bw.full = dfixed_mul(mclk_ff, temp_ff);
3264 
3265     pix_clk.full = 0;
3266     pix_clk2.full = 0;
3267     peak_disp_bw.full = 0;
3268     if (mode1) {
3269         temp_ff.full = dfixed_const(1000);
3270         pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */
3271         pix_clk.full = dfixed_div(pix_clk, temp_ff);
3272         temp_ff.full = dfixed_const(pixel_bytes1);
3273         peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff);
3274     }
3275     if (mode2) {
3276         temp_ff.full = dfixed_const(1000);
3277         pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */
3278         pix_clk2.full = dfixed_div(pix_clk2, temp_ff);
3279         temp_ff.full = dfixed_const(pixel_bytes2);
3280         peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff);
3281     }
3282 
3283     mem_bw.full = dfixed_mul(mem_bw, min_mem_eff);
3284     if (peak_disp_bw.full >= mem_bw.full) {
3285         DRM_ERROR("You may not have enough display bandwidth for current mode\n"
3286               "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
3287     }
3288 
3289     /*  Get values from the EXT_MEM_CNTL register...converting its contents. */
3290     temp = RREG32(RADEON_MEM_TIMING_CNTL);
3291     if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
3292         mem_trcd = ((temp >> 2) & 0x3) + 1;
3293         mem_trp  = ((temp & 0x3)) + 1;
3294         mem_tras = ((temp & 0x70) >> 4) + 1;
3295     } else if (rdev->family == CHIP_R300 ||
3296            rdev->family == CHIP_R350) { /* r300, r350 */
3297         mem_trcd = (temp & 0x7) + 1;
3298         mem_trp = ((temp >> 8) & 0x7) + 1;
3299         mem_tras = ((temp >> 11) & 0xf) + 4;
3300     } else if (rdev->family == CHIP_RV350 ||
3301            rdev->family == CHIP_RV380) {
3302         /* rv3x0 */
3303         mem_trcd = (temp & 0x7) + 3;
3304         mem_trp = ((temp >> 8) & 0x7) + 3;
3305         mem_tras = ((temp >> 11) & 0xf) + 6;
3306     } else if (rdev->family == CHIP_R420 ||
3307            rdev->family == CHIP_R423 ||
3308            rdev->family == CHIP_RV410) {
3309         /* r4xx */
3310         mem_trcd = (temp & 0xf) + 3;
3311         if (mem_trcd > 15)
3312             mem_trcd = 15;
3313         mem_trp = ((temp >> 8) & 0xf) + 3;
3314         if (mem_trp > 15)
3315             mem_trp = 15;
3316         mem_tras = ((temp >> 12) & 0x1f) + 6;
3317         if (mem_tras > 31)
3318             mem_tras = 31;
3319     } else { /* RV200, R200 */
3320         mem_trcd = (temp & 0x7) + 1;
3321         mem_trp = ((temp >> 8) & 0x7) + 1;
3322         mem_tras = ((temp >> 12) & 0xf) + 4;
3323     }
3324     /* convert to FF */
3325     trcd_ff.full = dfixed_const(mem_trcd);
3326     trp_ff.full = dfixed_const(mem_trp);
3327     tras_ff.full = dfixed_const(mem_tras);
3328 
3329     /* Get values from the MEM_SDRAM_MODE_REG register...converting its */
3330     temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
3331     data = (temp & (7 << 20)) >> 20;
3332     if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
3333         if (rdev->family == CHIP_RS480) /* don't think rs400 */
3334             tcas_ff = memtcas_rs480_ff[data];
3335         else
3336             tcas_ff = memtcas_ff[data];
3337     } else
3338         tcas_ff = memtcas2_ff[data];
3339 
3340     if (rdev->family == CHIP_RS400 ||
3341         rdev->family == CHIP_RS480) {
3342         /* extra cas latency stored in bits 23-25 0-4 clocks */
3343         data = (temp >> 23) & 0x7;
3344         if (data < 5)
3345             tcas_ff.full += dfixed_const(data);
3346     }
3347 
3348     if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
3349         /* on the R300, Tcas is included in Trbs.
3350          */
3351         temp = RREG32(RADEON_MEM_CNTL);
3352         data = (R300_MEM_NUM_CHANNELS_MASK & temp);
3353         if (data == 1) {
3354             if (R300_MEM_USE_CD_CH_ONLY & temp) {
3355                 temp = RREG32(R300_MC_IND_INDEX);
3356                 temp &= ~R300_MC_IND_ADDR_MASK;
3357                 temp |= R300_MC_READ_CNTL_CD_mcind;
3358                 WREG32(R300_MC_IND_INDEX, temp);
3359                 temp = RREG32(R300_MC_IND_DATA);
3360                 data = (R300_MEM_RBS_POSITION_C_MASK & temp);
3361             } else {
3362                 temp = RREG32(R300_MC_READ_CNTL_AB);
3363                 data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3364             }
3365         } else {
3366             temp = RREG32(R300_MC_READ_CNTL_AB);
3367             data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3368         }
3369         if (rdev->family == CHIP_RV410 ||
3370             rdev->family == CHIP_R420 ||
3371             rdev->family == CHIP_R423)
3372             trbs_ff = memtrbs_r4xx[data];
3373         else
3374             trbs_ff = memtrbs[data];
3375         tcas_ff.full += trbs_ff.full;
3376     }
3377 
3378     sclk_eff_ff.full = sclk_ff.full;
3379 
3380     if (rdev->flags & RADEON_IS_AGP) {
3381         fixed20_12 agpmode_ff;
3382         agpmode_ff.full = dfixed_const(radeon_agpmode);
3383         temp_ff.full = dfixed_const_666(16);
3384         sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff);
3385     }
3386     /* TODO PCIE lanes may affect this - agpmode == 16?? */
3387 
3388     if (ASIC_IS_R300(rdev)) {
3389         sclk_delay_ff.full = dfixed_const(250);
3390     } else {
3391         if ((rdev->family == CHIP_RV100) ||
3392             rdev->flags & RADEON_IS_IGP) {
3393             if (rdev->mc.vram_is_ddr)
3394                 sclk_delay_ff.full = dfixed_const(41);
3395             else
3396                 sclk_delay_ff.full = dfixed_const(33);
3397         } else {
3398             if (rdev->mc.vram_width == 128)
3399                 sclk_delay_ff.full = dfixed_const(57);
3400             else
3401                 sclk_delay_ff.full = dfixed_const(41);
3402         }
3403     }
3404 
3405     mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff);
3406 
3407     if (rdev->mc.vram_is_ddr) {
3408         if (rdev->mc.vram_width == 32) {
3409             k1.full = dfixed_const(40);
3410             c  = 3;
3411         } else {
3412             k1.full = dfixed_const(20);
3413             c  = 1;
3414         }
3415     } else {
3416         k1.full = dfixed_const(40);
3417         c  = 3;
3418     }
3419 
3420     temp_ff.full = dfixed_const(2);
3421     mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff);
3422     temp_ff.full = dfixed_const(c);
3423     mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff);
3424     temp_ff.full = dfixed_const(4);
3425     mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff);
3426     mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff);
3427     mc_latency_mclk.full += k1.full;
3428 
3429     mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff);
3430     mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff);
3431 
3432     /*
3433       HW cursor time assuming worst case of full size colour cursor.
3434     */
3435     temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
3436     temp_ff.full += trcd_ff.full;
3437     if (temp_ff.full < tras_ff.full)
3438         temp_ff.full = tras_ff.full;
3439     cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff);
3440 
3441     temp_ff.full = dfixed_const(cur_size);
3442     cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff);
3443     /*
3444       Find the total latency for the display data.
3445     */
3446     disp_latency_overhead.full = dfixed_const(8);
3447     disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff);
3448     mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
3449     mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
3450 
3451     if (mc_latency_mclk.full > mc_latency_sclk.full)
3452         disp_latency.full = mc_latency_mclk.full;
3453     else
3454         disp_latency.full = mc_latency_sclk.full;
3455 
3456     /* setup Max GRPH_STOP_REQ default value */
3457     if (ASIC_IS_RV100(rdev))
3458         max_stop_req = 0x5c;
3459     else
3460         max_stop_req = 0x7c;
3461 
3462     if (mode1) {
3463         /*  CRTC1
3464             Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
3465             GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
3466         */
3467         stop_req = mode1->hdisplay * pixel_bytes1 / 16;
3468 
3469         if (stop_req > max_stop_req)
3470             stop_req = max_stop_req;
3471 
3472         /*
3473           Find the drain rate of the display buffer.
3474         */
3475         temp_ff.full = dfixed_const((16/pixel_bytes1));
3476         disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
3477 
3478         /*
3479           Find the critical point of the display buffer.
3480         */
3481         crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
3482         crit_point_ff.full += dfixed_const_half(0);
3483 
3484         critical_point = dfixed_trunc(crit_point_ff);
3485 
3486         if (rdev->disp_priority == 2) {
3487             critical_point = 0;
3488         }
3489 
3490         /*
3491           The critical point should never be above max_stop_req-4.  Setting
3492           GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
3493         */
3494         if (max_stop_req - critical_point < 4)
3495             critical_point = 0;
3496 
3497         if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
3498             /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
3499             critical_point = 0x10;
3500         }
3501 
3502         temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
3503         temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
3504         temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3505         temp &= ~(RADEON_GRPH_START_REQ_MASK);
3506         if ((rdev->family == CHIP_R350) &&
3507             (stop_req > 0x15)) {
3508             stop_req -= 0x10;
3509         }
3510         temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3511         temp |= RADEON_GRPH_BUFFER_SIZE;
3512         temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3513               RADEON_GRPH_CRITICAL_AT_SOF |
3514               RADEON_GRPH_STOP_CNTL);
3515         /*
3516           Write the result into the register.
3517         */
3518         WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3519                                (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3520 
3521 #if 0
3522         if ((rdev->family == CHIP_RS400) ||
3523             (rdev->family == CHIP_RS480)) {
3524             /* attempt to program RS400 disp regs correctly ??? */
3525             temp = RREG32(RS400_DISP1_REG_CNTL);
3526             temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
3527                   RS400_DISP1_STOP_REQ_LEVEL_MASK);
3528             WREG32(RS400_DISP1_REQ_CNTL1, (temp |
3529                                (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3530                                (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3531             temp = RREG32(RS400_DMIF_MEM_CNTL1);
3532             temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
3533                   RS400_DISP1_CRITICAL_POINT_STOP_MASK);
3534             WREG32(RS400_DMIF_MEM_CNTL1, (temp |
3535                               (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
3536                               (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
3537         }
3538 #endif
3539 
3540         DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n",
3541               /*      (unsigned int)info->SavedReg->grph_buffer_cntl, */
3542               (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
3543     }
3544 
3545     if (mode2) {
3546         u32 grph2_cntl;
3547         stop_req = mode2->hdisplay * pixel_bytes2 / 16;
3548 
3549         if (stop_req > max_stop_req)
3550             stop_req = max_stop_req;
3551 
3552         /*
3553           Find the drain rate of the display buffer.
3554         */
3555         temp_ff.full = dfixed_const((16/pixel_bytes2));
3556         disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff);
3557 
3558         grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
3559         grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
3560         grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3561         grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
3562         if ((rdev->family == CHIP_R350) &&
3563             (stop_req > 0x15)) {
3564             stop_req -= 0x10;
3565         }
3566         grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3567         grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
3568         grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3569               RADEON_GRPH_CRITICAL_AT_SOF |
3570               RADEON_GRPH_STOP_CNTL);
3571 
3572         if ((rdev->family == CHIP_RS100) ||
3573             (rdev->family == CHIP_RS200))
3574             critical_point2 = 0;
3575         else {
3576             temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
3577             temp_ff.full = dfixed_const(temp);
3578             temp_ff.full = dfixed_mul(mclk_ff, temp_ff);
3579             if (sclk_ff.full < temp_ff.full)
3580                 temp_ff.full = sclk_ff.full;
3581 
3582             read_return_rate.full = temp_ff.full;
3583 
3584             if (mode1) {
3585                 temp_ff.full = read_return_rate.full - disp_drain_rate.full;
3586                 time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff);
3587             } else {
3588                 time_disp1_drop_priority.full = 0;
3589             }
3590             crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
3591             crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2);
3592             crit_point_ff.full += dfixed_const_half(0);
3593 
3594             critical_point2 = dfixed_trunc(crit_point_ff);
3595 
3596             if (rdev->disp_priority == 2) {
3597                 critical_point2 = 0;
3598             }
3599 
3600             if (max_stop_req - critical_point2 < 4)
3601                 critical_point2 = 0;
3602 
3603         }
3604 
3605         if (critical_point2 == 0 && rdev->family == CHIP_R300) {
3606             /* some R300 cards have problem with this set to 0 */
3607             critical_point2 = 0x10;
3608         }
3609 
3610         WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3611                           (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3612 
3613         if ((rdev->family == CHIP_RS400) ||
3614             (rdev->family == CHIP_RS480)) {
3615 #if 0
3616             /* attempt to program RS400 disp2 regs correctly ??? */
3617             temp = RREG32(RS400_DISP2_REQ_CNTL1);
3618             temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
3619                   RS400_DISP2_STOP_REQ_LEVEL_MASK);
3620             WREG32(RS400_DISP2_REQ_CNTL1, (temp |
3621                                (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3622                                (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3623             temp = RREG32(RS400_DISP2_REQ_CNTL2);
3624             temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
3625                   RS400_DISP2_CRITICAL_POINT_STOP_MASK);
3626             WREG32(RS400_DISP2_REQ_CNTL2, (temp |
3627                                (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
3628                                (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
3629 #endif
3630             WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
3631             WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
3632             WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
3633             WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
3634         }
3635 
3636         DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
3637               (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
3638     }
3639 
3640     /* Save number of lines the linebuffer leads before the scanout */
3641     if (mode1)
3642         rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
3643 
3644     if (mode2)
3645         rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
3646 }
3647 
3648 int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3649 {
3650     uint32_t scratch;
3651     uint32_t tmp = 0;
3652     unsigned i;
3653     int r;
3654 
3655     r = radeon_scratch_get(rdev, &scratch);
3656     if (r) {
3657         DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3658         return r;
3659     }
3660     WREG32(scratch, 0xCAFEDEAD);
3661     r = radeon_ring_lock(rdev, ring, 2);
3662     if (r) {
3663         DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3664         radeon_scratch_free(rdev, scratch);
3665         return r;
3666     }
3667     radeon_ring_write(ring, PACKET0(scratch, 0));
3668     radeon_ring_write(ring, 0xDEADBEEF);
3669     radeon_ring_unlock_commit(rdev, ring, false);
3670     for (i = 0; i < rdev->usec_timeout; i++) {
3671         tmp = RREG32(scratch);
3672         if (tmp == 0xDEADBEEF) {
3673             break;
3674         }
3675         udelay(1);
3676     }
3677     if (i < rdev->usec_timeout) {
3678         DRM_INFO("ring test succeeded in %d usecs\n", i);
3679     } else {
3680         DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
3681               scratch, tmp);
3682         r = -EINVAL;
3683     }
3684     radeon_scratch_free(rdev, scratch);
3685     return r;
3686 }
3687 
3688 void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3689 {
3690     struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3691 
3692     if (ring->rptr_save_reg) {
3693         u32 next_rptr = ring->wptr + 2 + 3;
3694         radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
3695         radeon_ring_write(ring, next_rptr);
3696     }
3697 
3698     radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
3699     radeon_ring_write(ring, ib->gpu_addr);
3700     radeon_ring_write(ring, ib->length_dw);
3701 }
3702 
3703 int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3704 {
3705     struct radeon_ib ib;
3706     uint32_t scratch;
3707     uint32_t tmp = 0;
3708     unsigned i;
3709     int r;
3710 
3711     r = radeon_scratch_get(rdev, &scratch);
3712     if (r) {
3713         DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3714         return r;
3715     }
3716     WREG32(scratch, 0xCAFEDEAD);
3717     r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256);
3718     if (r) {
3719         DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3720         goto free_scratch;
3721     }
3722     ib.ptr[0] = PACKET0(scratch, 0);
3723     ib.ptr[1] = 0xDEADBEEF;
3724     ib.ptr[2] = PACKET2(0);
3725     ib.ptr[3] = PACKET2(0);
3726     ib.ptr[4] = PACKET2(0);
3727     ib.ptr[5] = PACKET2(0);
3728     ib.ptr[6] = PACKET2(0);
3729     ib.ptr[7] = PACKET2(0);
3730     ib.length_dw = 8;
3731     r = radeon_ib_schedule(rdev, &ib, NULL, false);
3732     if (r) {
3733         DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3734         goto free_ib;
3735     }
3736     r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3737         RADEON_USEC_IB_TEST_TIMEOUT));
3738     if (r < 0) {
3739         DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3740         goto free_ib;
3741     } else if (r == 0) {
3742         DRM_ERROR("radeon: fence wait timed out.\n");
3743         r = -ETIMEDOUT;
3744         goto free_ib;
3745     }
3746     r = 0;
3747     for (i = 0; i < rdev->usec_timeout; i++) {
3748         tmp = RREG32(scratch);
3749         if (tmp == 0xDEADBEEF) {
3750             break;
3751         }
3752         udelay(1);
3753     }
3754     if (i < rdev->usec_timeout) {
3755         DRM_INFO("ib test succeeded in %u usecs\n", i);
3756     } else {
3757         DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3758               scratch, tmp);
3759         r = -EINVAL;
3760     }
3761 free_ib:
3762     radeon_ib_free(rdev, &ib);
3763 free_scratch:
3764     radeon_scratch_free(rdev, scratch);
3765     return r;
3766 }
3767 
3768 void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
3769 {
3770     /* Shutdown CP we shouldn't need to do that but better be safe than
3771      * sorry
3772      */
3773     rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3774     WREG32(R_000740_CP_CSQ_CNTL, 0);
3775 
3776     /* Save few CRTC registers */
3777     save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
3778     save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL);
3779     save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL);
3780     save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET);
3781     if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3782         save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL);
3783         save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET);
3784     }
3785 
3786     /* Disable VGA aperture access */
3787     WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT);
3788     /* Disable cursor, overlay, crtc */
3789     WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1));
3790     WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL |
3791                     S_000054_CRTC_DISPLAY_DIS(1));
3792     WREG32(R_000050_CRTC_GEN_CNTL,
3793             (C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) |
3794             S_000050_CRTC_DISP_REQ_EN_B(1));
3795     WREG32(R_000420_OV0_SCALE_CNTL,
3796         C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL));
3797     WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET);
3798     if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3799         WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET |
3800                         S_000360_CUR2_LOCK(1));
3801         WREG32(R_0003F8_CRTC2_GEN_CNTL,
3802             (C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) |
3803             S_0003F8_CRTC2_DISPLAY_DIS(1) |
3804             S_0003F8_CRTC2_DISP_REQ_EN_B(1));
3805         WREG32(R_000360_CUR2_OFFSET,
3806             C_000360_CUR2_LOCK & save->CUR2_OFFSET);
3807     }
3808 }
3809 
3810 void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
3811 {
3812     /* Update base address for crtc */
3813     WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3814     if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3815         WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3816     }
3817     /* Restore CRTC registers */
3818     WREG8(R_0003C2_GENMO_WT, save->GENMO_WT);
3819     WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL);
3820     WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL);
3821     if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3822         WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL);
3823     }
3824 }
3825 
3826 void r100_vga_render_disable(struct radeon_device *rdev)
3827 {
3828     u32 tmp;
3829 
3830     tmp = RREG8(R_0003C2_GENMO_WT);
3831     WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp);
3832 }
3833 
3834 static void r100_mc_program(struct radeon_device *rdev)
3835 {
3836     struct r100_mc_save save;
3837 
3838     /* Stops all mc clients */
3839     r100_mc_stop(rdev, &save);
3840     if (rdev->flags & RADEON_IS_AGP) {
3841         WREG32(R_00014C_MC_AGP_LOCATION,
3842             S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) |
3843             S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16));
3844         WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base));
3845         if (rdev->family > CHIP_RV200)
3846             WREG32(R_00015C_AGP_BASE_2,
3847                 upper_32_bits(rdev->mc.agp_base) & 0xff);
3848     } else {
3849         WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF);
3850         WREG32(R_000170_AGP_BASE, 0);
3851         if (rdev->family > CHIP_RV200)
3852             WREG32(R_00015C_AGP_BASE_2, 0);
3853     }
3854     /* Wait for mc idle */
3855     if (r100_mc_wait_for_idle(rdev))
3856         dev_warn(rdev->dev, "Wait for MC idle timeout.\n");
3857     /* Program MC, should be a 32bits limited address space */
3858     WREG32(R_000148_MC_FB_LOCATION,
3859         S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
3860         S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));
3861     r100_mc_resume(rdev, &save);
3862 }
3863 
3864 static void r100_clock_startup(struct radeon_device *rdev)
3865 {
3866     u32 tmp;
3867 
3868     if (radeon_dynclks != -1 && radeon_dynclks)
3869         radeon_legacy_set_clock_gating(rdev, 1);
3870     /* We need to force on some of the block */
3871     tmp = RREG32_PLL(R_00000D_SCLK_CNTL);
3872     tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1);
3873     if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
3874         tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1);
3875     WREG32_PLL(R_00000D_SCLK_CNTL, tmp);
3876 }
3877 
3878 static int r100_startup(struct radeon_device *rdev)
3879 {
3880     int r;
3881 
3882     /* set common regs */
3883     r100_set_common_regs(rdev);
3884     /* program mc */
3885     r100_mc_program(rdev);
3886     /* Resume clock */
3887     r100_clock_startup(rdev);
3888     /* Initialize GART (initialize after TTM so we can allocate
3889      * memory through TTM but finalize after TTM) */
3890     r100_enable_bm(rdev);
3891     if (rdev->flags & RADEON_IS_PCI) {
3892         r = r100_pci_gart_enable(rdev);
3893         if (r)
3894             return r;
3895     }
3896 
3897     /* allocate wb buffer */
3898     r = radeon_wb_init(rdev);
3899     if (r)
3900         return r;
3901 
3902     r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
3903     if (r) {
3904         dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3905         return r;
3906     }
3907 
3908     /* Enable IRQ */
3909     if (!rdev->irq.installed) {
3910         r = radeon_irq_kms_init(rdev);
3911         if (r)
3912             return r;
3913     }
3914 
3915     r100_irq_set(rdev);
3916     rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
3917     /* 1M ring buffer */
3918     r = r100_cp_init(rdev, 1024 * 1024);
3919     if (r) {
3920         dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
3921         return r;
3922     }
3923 
3924     r = radeon_ib_pool_init(rdev);
3925     if (r) {
3926         dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
3927         return r;
3928     }
3929 
3930     return 0;
3931 }
3932 
3933 int r100_resume(struct radeon_device *rdev)
3934 {
3935     int r;
3936 
3937     /* Make sur GART are not working */
3938     if (rdev->flags & RADEON_IS_PCI)
3939         r100_pci_gart_disable(rdev);
3940     /* Resume clock before doing reset */
3941     r100_clock_startup(rdev);
3942     /* Reset gpu before posting otherwise ATOM will enter infinite loop */
3943     if (radeon_asic_reset(rdev)) {
3944         dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
3945             RREG32(R_000E40_RBBM_STATUS),
3946             RREG32(R_0007C0_CP_STAT));
3947     }
3948     /* post */
3949     radeon_combios_asic_init(rdev->ddev);
3950     /* Resume clock after posting */
3951     r100_clock_startup(rdev);
3952     /* Initialize surface registers */
3953     radeon_surface_init(rdev);
3954 
3955     rdev->accel_working = true;
3956     r = r100_startup(rdev);
3957     if (r) {
3958         rdev->accel_working = false;
3959     }
3960     return r;
3961 }
3962 
3963 int r100_suspend(struct radeon_device *rdev)
3964 {
3965     radeon_pm_suspend(rdev);
3966     r100_cp_disable(rdev);
3967     radeon_wb_disable(rdev);
3968     r100_irq_disable(rdev);
3969     if (rdev->flags & RADEON_IS_PCI)
3970         r100_pci_gart_disable(rdev);
3971     return 0;
3972 }
3973 
3974 void r100_fini(struct radeon_device *rdev)
3975 {
3976     radeon_pm_fini(rdev);
3977     r100_cp_fini(rdev);
3978     radeon_wb_fini(rdev);
3979     radeon_ib_pool_fini(rdev);
3980     radeon_gem_fini(rdev);
3981     if (rdev->flags & RADEON_IS_PCI)
3982         r100_pci_gart_fini(rdev);
3983     radeon_agp_fini(rdev);
3984     radeon_irq_kms_fini(rdev);
3985     radeon_fence_driver_fini(rdev);
3986     radeon_bo_fini(rdev);
3987     radeon_atombios_fini(rdev);
3988     kfree(rdev->bios);
3989     rdev->bios = NULL;
3990 }
3991 
3992 /*
3993  * Due to how kexec works, it can leave the hw fully initialised when it
3994  * boots the new kernel. However doing our init sequence with the CP and
3995  * WB stuff setup causes GPU hangs on the RN50 at least. So at startup
3996  * do some quick sanity checks and restore sane values to avoid this
3997  * problem.
3998  */
3999 void r100_restore_sanity(struct radeon_device *rdev)
4000 {
4001     u32 tmp;
4002 
4003     tmp = RREG32(RADEON_CP_CSQ_CNTL);
4004     if (tmp) {
4005         WREG32(RADEON_CP_CSQ_CNTL, 0);
4006     }
4007     tmp = RREG32(RADEON_CP_RB_CNTL);
4008     if (tmp) {
4009         WREG32(RADEON_CP_RB_CNTL, 0);
4010     }
4011     tmp = RREG32(RADEON_SCRATCH_UMSK);
4012     if (tmp) {
4013         WREG32(RADEON_SCRATCH_UMSK, 0);
4014     }
4015 }
4016 
4017 int r100_init(struct radeon_device *rdev)
4018 {
4019     int r;
4020 
4021     /* Register debugfs file specific to this group of asics */
4022     r100_debugfs_mc_info_init(rdev);
4023     /* Disable VGA */
4024     r100_vga_render_disable(rdev);
4025     /* Initialize scratch registers */
4026     radeon_scratch_init(rdev);
4027     /* Initialize surface registers */
4028     radeon_surface_init(rdev);
4029     /* sanity check some register to avoid hangs like after kexec */
4030     r100_restore_sanity(rdev);
4031     /* TODO: disable VGA need to use VGA request */
4032     /* BIOS*/
4033     if (!radeon_get_bios(rdev)) {
4034         if (ASIC_IS_AVIVO(rdev))
4035             return -EINVAL;
4036     }
4037     if (rdev->is_atom_bios) {
4038         dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n");
4039         return -EINVAL;
4040     } else {
4041         r = radeon_combios_init(rdev);
4042         if (r)
4043             return r;
4044     }
4045     /* Reset gpu before posting otherwise ATOM will enter infinite loop */
4046     if (radeon_asic_reset(rdev)) {
4047         dev_warn(rdev->dev,
4048             "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
4049             RREG32(R_000E40_RBBM_STATUS),
4050             RREG32(R_0007C0_CP_STAT));
4051     }
4052     /* check if cards are posted or not */
4053     if (radeon_boot_test_post_card(rdev) == false)
4054         return -EINVAL;
4055     /* Set asic errata */
4056     r100_errata(rdev);
4057     /* Initialize clocks */
4058     radeon_get_clock_info(rdev->ddev);
4059     /* initialize AGP */
4060     if (rdev->flags & RADEON_IS_AGP) {
4061         r = radeon_agp_init(rdev);
4062         if (r) {
4063             radeon_agp_disable(rdev);
4064         }
4065     }
4066     /* initialize VRAM */
4067     r100_mc_init(rdev);
4068     /* Fence driver */
4069     radeon_fence_driver_init(rdev);
4070     /* Memory manager */
4071     r = radeon_bo_init(rdev);
4072     if (r)
4073         return r;
4074     if (rdev->flags & RADEON_IS_PCI) {
4075         r = r100_pci_gart_init(rdev);
4076         if (r)
4077             return r;
4078     }
4079     r100_set_safe_registers(rdev);
4080 
4081     /* Initialize power management */
4082     radeon_pm_init(rdev);
4083 
4084     rdev->accel_working = true;
4085     r = r100_startup(rdev);
4086     if (r) {
4087         /* Somethings want wront with the accel init stop accel */
4088         dev_err(rdev->dev, "Disabling GPU acceleration\n");
4089         r100_cp_fini(rdev);
4090         radeon_wb_fini(rdev);
4091         radeon_ib_pool_fini(rdev);
4092         radeon_irq_kms_fini(rdev);
4093         if (rdev->flags & RADEON_IS_PCI)
4094             r100_pci_gart_fini(rdev);
4095         rdev->accel_working = false;
4096     }
4097     return 0;
4098 }
4099 
4100 uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg)
4101 {
4102     unsigned long flags;
4103     uint32_t ret;
4104 
4105     spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4106     writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4107     ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4108     spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4109     return ret;
4110 }
4111 
4112 void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v)
4113 {
4114     unsigned long flags;
4115 
4116     spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4117     writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4118     writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4119     spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4120 }
4121 
4122 u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
4123 {
4124     if (reg < rdev->rio_mem_size)
4125         return ioread32(rdev->rio_mem + reg);
4126     else {
4127         iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4128         return ioread32(rdev->rio_mem + RADEON_MM_DATA);
4129     }
4130 }
4131 
4132 void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
4133 {
4134     if (reg < rdev->rio_mem_size)
4135         iowrite32(v, rdev->rio_mem + reg);
4136     else {
4137         iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4138         iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
4139     }
4140 }