Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: MIT
0002 /*
0003  * Copyright 2022 Advanced Micro Devices, Inc.
0004  *
0005  * Permission is hereby granted, free of charge, to any person obtaining a
0006  * copy of this software and associated documentation files (the "Software"),
0007  * to deal in the Software without restriction, including without limitation
0008  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0009  * and/or sell copies of the Software, and to permit persons to whom the
0010  * Software is furnished to do so, subject to the following conditions:
0011  *
0012  * The above copyright notice and this permission notice shall be included in
0013  * all copies or substantial portions of the Software.
0014  *
0015  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0016  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0017  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0018  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0019  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0020  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0021  * OTHER DEALINGS IN THE SOFTWARE.
0022  *
0023  * Authors: AMD
0024  *
0025  */
0026 
0027 #include "clk_mgr.h"
0028 #include "resource.h"
0029 #include "dcn31/dcn31_hubbub.h"
0030 #include "dcn314_fpu.h"
0031 #include "dml/dcn20/dcn20_fpu.h"
0032 #include "dml/display_mode_vba.h"
0033 
0034 struct _vcs_dpi_ip_params_st dcn3_14_ip = {
0035     .VBlankNomDefaultUS = 668,
0036     .gpuvm_enable = 1,
0037     .gpuvm_max_page_table_levels = 1,
0038     .hostvm_enable = 1,
0039     .hostvm_max_page_table_levels = 2,
0040     .rob_buffer_size_kbytes = 64,
0041     .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE,
0042     .config_return_buffer_size_in_kbytes = 1792,
0043     .compressed_buffer_segment_size_in_kbytes = 64,
0044     .meta_fifo_size_in_kentries = 32,
0045     .zero_size_buffer_entries = 512,
0046     .compbuf_reserved_space_64b = 256,
0047     .compbuf_reserved_space_zs = 64,
0048     .dpp_output_buffer_pixels = 2560,
0049     .opp_output_buffer_lines = 1,
0050     .pixel_chunk_size_kbytes = 8,
0051     .meta_chunk_size_kbytes = 2,
0052     .min_meta_chunk_size_bytes = 256,
0053     .writeback_chunk_size_kbytes = 8,
0054     .ptoi_supported = false,
0055     .num_dsc = 4,
0056     .maximum_dsc_bits_per_component = 10,
0057     .dsc422_native_support = false,
0058     .is_line_buffer_bpp_fixed = true,
0059     .line_buffer_fixed_bpp = 48,
0060     .line_buffer_size_bits = 789504,
0061     .max_line_buffer_lines = 12,
0062     .writeback_interface_buffer_size_kbytes = 90,
0063     .max_num_dpp = 4,
0064     .max_num_otg = 4,
0065     .max_num_hdmi_frl_outputs = 1,
0066     .max_num_wb = 1,
0067     .max_dchub_pscl_bw_pix_per_clk = 4,
0068     .max_pscl_lb_bw_pix_per_clk = 2,
0069     .max_lb_vscl_bw_pix_per_clk = 4,
0070     .max_vscl_hscl_bw_pix_per_clk = 4,
0071     .max_hscl_ratio = 6,
0072     .max_vscl_ratio = 6,
0073     .max_hscl_taps = 8,
0074     .max_vscl_taps = 8,
0075     .dpte_buffer_size_in_pte_reqs_luma = 64,
0076     .dpte_buffer_size_in_pte_reqs_chroma = 34,
0077     .dispclk_ramp_margin_percent = 1,
0078     .max_inter_dcn_tile_repeaters = 8,
0079     .cursor_buffer_size = 16,
0080     .cursor_chunk_size = 2,
0081     .writeback_line_buffer_buffer_size = 0,
0082     .writeback_min_hscl_ratio = 1,
0083     .writeback_min_vscl_ratio = 1,
0084     .writeback_max_hscl_ratio = 1,
0085     .writeback_max_vscl_ratio = 1,
0086     .writeback_max_hscl_taps = 1,
0087     .writeback_max_vscl_taps = 1,
0088     .dppclk_delay_subtotal = 46,
0089     .dppclk_delay_scl = 50,
0090     .dppclk_delay_scl_lb_only = 16,
0091     .dppclk_delay_cnvc_formatter = 27,
0092     .dppclk_delay_cnvc_cursor = 6,
0093     .dispclk_delay_subtotal = 119,
0094     .dynamic_metadata_vm_enabled = false,
0095     .odm_combine_4to1_supported = false,
0096     .dcc_supported = true,
0097 };
0098 
0099 struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
0100         /*TODO: correct dispclk/dppclk voltage level determination*/
0101     .clock_limits = {
0102         {
0103             .state = 0,
0104             .dispclk_mhz = 1200.0,
0105             .dppclk_mhz = 1200.0,
0106             .phyclk_mhz = 600.0,
0107             .phyclk_d18_mhz = 667.0,
0108             .dscclk_mhz = 186.0,
0109             .dtbclk_mhz = 600.0,
0110         },
0111         {
0112             .state = 1,
0113             .dispclk_mhz = 1200.0,
0114             .dppclk_mhz = 1200.0,
0115             .phyclk_mhz = 810.0,
0116             .phyclk_d18_mhz = 667.0,
0117             .dscclk_mhz = 209.0,
0118             .dtbclk_mhz = 600.0,
0119         },
0120         {
0121             .state = 2,
0122             .dispclk_mhz = 1200.0,
0123             .dppclk_mhz = 1200.0,
0124             .phyclk_mhz = 810.0,
0125             .phyclk_d18_mhz = 667.0,
0126             .dscclk_mhz = 209.0,
0127             .dtbclk_mhz = 600.0,
0128         },
0129         {
0130             .state = 3,
0131             .dispclk_mhz = 1200.0,
0132             .dppclk_mhz = 1200.0,
0133             .phyclk_mhz = 810.0,
0134             .phyclk_d18_mhz = 667.0,
0135             .dscclk_mhz = 371.0,
0136             .dtbclk_mhz = 600.0,
0137         },
0138         {
0139             .state = 4,
0140             .dispclk_mhz = 1200.0,
0141             .dppclk_mhz = 1200.0,
0142             .phyclk_mhz = 810.0,
0143             .phyclk_d18_mhz = 667.0,
0144             .dscclk_mhz = 417.0,
0145             .dtbclk_mhz = 600.0,
0146         },
0147     },
0148     .num_states = 5,
0149     .sr_exit_time_us = 9.0,
0150     .sr_enter_plus_exit_time_us = 11.0,
0151     .sr_exit_z8_time_us = 442.0,
0152     .sr_enter_plus_exit_z8_time_us = 560.0,
0153     .writeback_latency_us = 12.0,
0154     .dram_channel_width_bytes = 4,
0155     .round_trip_ping_latency_dcfclk_cycles = 106,
0156     .urgent_latency_pixel_data_only_us = 4.0,
0157     .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
0158     .urgent_latency_vm_data_only_us = 4.0,
0159     .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
0160     .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
0161     .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
0162     .pct_ideal_sdp_bw_after_urgent = 80.0,
0163     .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
0164     .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
0165     .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
0166     .max_avg_sdp_bw_use_normal_percent = 60.0,
0167     .max_avg_dram_bw_use_normal_percent = 60.0,
0168     .fabric_datapath_to_dcn_data_return_bytes = 32,
0169     .return_bus_width_bytes = 64,
0170     .downspread_percent = 0.38,
0171     .dcn_downspread_percent = 0.5,
0172     .gpuvm_min_page_size_bytes = 4096,
0173     .hostvm_min_page_size_bytes = 4096,
0174     .do_urgent_latency_adjustment = false,
0175     .urgent_latency_adjustment_fabric_clock_component_us = 0,
0176     .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
0177 };
0178 
0179 
0180 void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
0181 {
0182     struct clk_limit_table *clk_table = &bw_params->clk_table;
0183     struct _vcs_dpi_voltage_scaling_st *clock_limits =
0184         dcn3_14_soc.clock_limits;
0185     unsigned int i, closest_clk_lvl;
0186     int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
0187     int j;
0188 
0189     dc_assert_fp_enabled();
0190 
0191     // Default clock levels are used for diags, which may lead to overclocking.
0192     if (!IS_DIAG_DC(dc->ctx->dce_environment) && dc->config.use_default_clock_table == false) {
0193 
0194         dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
0195         dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count;
0196 
0197         if (bw_params->dram_channel_width_bytes > 0)
0198             dcn3_14_soc.dram_channel_width_bytes = bw_params->dram_channel_width_bytes;
0199 
0200         if (bw_params->num_channels > 0)
0201             dcn3_14_soc.num_chans = bw_params->num_channels;
0202 
0203         ASSERT(dcn3_14_soc.num_chans);
0204         ASSERT(clk_table->num_entries);
0205 
0206         /* Prepass to find max clocks independent of voltage level. */
0207         for (i = 0; i < clk_table->num_entries; ++i) {
0208             if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
0209                 max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
0210             if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
0211                 max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
0212         }
0213 
0214         for (i = 0; i < clk_table->num_entries; i++) {
0215             /* loop backwards*/
0216             for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) {
0217                 if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
0218                     closest_clk_lvl = j;
0219                     break;
0220                 }
0221             }
0222             if (clk_table->num_entries == 1) {
0223                 /*smu gives one DPM level, let's take the highest one*/
0224                 closest_clk_lvl = dcn3_14_soc.num_states - 1;
0225             }
0226 
0227             clock_limits[i].state = i;
0228 
0229             /* Clocks dependent on voltage level. */
0230             clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
0231             if (clk_table->num_entries == 1 &&
0232                 clock_limits[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
0233                 /*SMU fix not released yet*/
0234                 clock_limits[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
0235             }
0236             clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
0237             clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
0238 
0239             if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio)
0240                 clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
0241 
0242             /* Clocks independent of voltage level. */
0243             clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
0244                 dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
0245 
0246             clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
0247                 dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
0248 
0249             clock_limits[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
0250             clock_limits[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
0251             clock_limits[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
0252             clock_limits[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
0253             clock_limits[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
0254         }
0255         for (i = 0; i < clk_table->num_entries; i++)
0256             dcn3_14_soc.clock_limits[i] = clock_limits[i];
0257         if (clk_table->num_entries) {
0258             dcn3_14_soc.num_states = clk_table->num_entries;
0259         }
0260     }
0261 
0262     if (max_dispclk_mhz) {
0263         dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
0264         dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
0265     }
0266 
0267     if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
0268         dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN314);
0269     else
0270         dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA);
0271 }
0272 
0273 static bool is_dual_plane(enum surface_pixel_format format)
0274 {
0275     return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
0276 }
0277 
0278 int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
0279                            display_e2e_pipe_params_st *pipes,
0280                            bool fast_validate)
0281 {
0282     int i, pipe_cnt;
0283     struct resource_context *res_ctx = &context->res_ctx;
0284     struct pipe_ctx *pipe;
0285     bool upscaled = false;
0286 
0287     dc_assert_fp_enabled();
0288 
0289     dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
0290 
0291     for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
0292         struct dc_crtc_timing *timing;
0293 
0294         if (!res_ctx->pipe_ctx[i].stream)
0295             continue;
0296         pipe = &res_ctx->pipe_ctx[i];
0297         timing = &pipe->stream->timing;
0298 
0299         if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min
0300             && pipe->stream->adjust.v_total_min > timing->v_total)
0301             pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
0302 
0303         if (pipe->plane_state &&
0304                 (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height ||
0305                 pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width))
0306             upscaled = true;
0307 
0308         /*
0309          * Immediate flip can be set dynamically after enabling the plane.
0310          * We need to require support for immediate flip or underflow can be
0311          * intermittently experienced depending on peak b/w requirements.
0312          */
0313         pipes[pipe_cnt].pipe.src.immediate_flip = true;
0314 
0315         pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
0316         pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
0317         pipes[pipe_cnt].pipe.src.gpuvm = true;
0318         pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
0319         pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
0320         pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
0321         pipes[pipe_cnt].pipe.src.dcc_rate = 3;
0322         pipes[pipe_cnt].dout.dsc_input_bpc = 0;
0323 
0324         if (pipes[pipe_cnt].dout.dsc_enable) {
0325             switch (timing->display_color_depth) {
0326             case COLOR_DEPTH_888:
0327                 pipes[pipe_cnt].dout.dsc_input_bpc = 8;
0328                 break;
0329             case COLOR_DEPTH_101010:
0330                 pipes[pipe_cnt].dout.dsc_input_bpc = 10;
0331                 break;
0332             case COLOR_DEPTH_121212:
0333                 pipes[pipe_cnt].dout.dsc_input_bpc = 12;
0334                 break;
0335             default:
0336                 ASSERT(0);
0337                 break;
0338             }
0339         }
0340 
0341         pipe_cnt++;
0342     }
0343     context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE;
0344 
0345     dc->config.enable_4to1MPC = false;
0346     if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
0347         if (is_dual_plane(pipe->plane_state->format)
0348                 && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
0349             dc->config.enable_4to1MPC = true;
0350         } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) {
0351             /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */
0352             context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
0353             pipes[0].pipe.src.unbounded_req_mode = true;
0354         }
0355     } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count
0356             && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
0357         context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64;
0358     } else if (context->stream_count >= 3 && upscaled) {
0359         context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
0360     }
0361 
0362     for (i = 0; i < dc->res_pool->pipe_count; i++) {
0363         struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
0364 
0365         if (!pipe->stream)
0366             continue;
0367 
0368         if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine &&
0369                 pipe->stream->apply_seamless_boot_optimization) {
0370 
0371             if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) {
0372                 context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1;
0373                 break;
0374             }
0375         }
0376     }
0377 
0378     return pipe_cnt;
0379 }