1 /* 2 * Copyright 2009 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric (at) anholt.net> 25 * 26 */ 27 28 #include "brw_context.h" 29 #include "brw_state.h" 30 #include "brw_defines.h" 31 #include "brw_util.h" 32 #include "brw_wm.h" 33 #include "program/program.h" 34 #include "program/prog_parameter.h" 35 #include "program/prog_statevars.h" 36 #include "main/shaderapi.h" 37 #include "main/framebuffer.h" 38 #include "intel_batchbuffer.h" 39 40 static void 41 gen6_upload_wm_push_constants(struct brw_context *brw) 42 { 43 struct brw_stage_state *stage_state = &brw->wm.base; 44 /* BRW_NEW_FRAGMENT_PROGRAM */ 45 const struct brw_program *fp = brw_program_const(brw->fragment_program); 46 /* BRW_NEW_FS_PROG_DATA */ 47 const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data; 48 49 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT); 50 51 gen6_upload_push_constants(brw, &fp->program, prog_data, 52 stage_state, AUB_TRACE_WM_CONSTANTS); 53 54 if (brw->gen >= 7) { 55 gen7_upload_constant_state(brw, &brw->wm.base, true, 56 _3DSTATE_CONSTANT_PS); 57 } 58 } 59 60 const struct brw_tracked_state gen6_wm_push_constants = { 61 .dirty = { 62 .mesa = _NEW_PROGRAM_CONSTANTS, 63 .brw = BRW_NEW_BATCH | 64 BRW_NEW_BLORP | 65 BRW_NEW_FRAGMENT_PROGRAM | 66 BRW_NEW_FS_PROG_DATA | 67 BRW_NEW_PUSH_CONSTANT_ALLOCATION, 68 }, 69 .emit = gen6_upload_wm_push_constants, 70 }; 71 72 void 73 gen6_upload_wm_state(struct brw_context *brw, 74 const struct brw_wm_prog_data *prog_data, 75 const struct brw_stage_state *stage_state, 76 bool multisampled_fbo, 77 bool dual_source_blend_enable, bool kill_enable, 78 bool color_buffer_write_enable, bool msaa_enabled, 79 bool line_stipple_enable, bool polygon_stipple_enable, 80 bool statistic_enable) 81 { 82 const struct gen_device_info *devinfo = &brw->screen->devinfo; 83 uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2; 84 85 /* We can't fold this into gen6_upload_wm_push_constants(), because 86 * according to the SNB PRM, vol 2 part 1 section 7.2.2 87 * (3DSTATE_CONSTANT_PS [DevSNB]): 88 * 89 * "[DevSNB]: This packet must be followed by WM_STATE." 90 */ 91 if (prog_data->base.nr_params == 0) { 92 /* Disable the push constant buffers. */ 93 BEGIN_BATCH(5); 94 OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2)); 95 OUT_BATCH(0); 96 OUT_BATCH(0); 97 OUT_BATCH(0); 98 OUT_BATCH(0); 99 ADVANCE_BATCH(); 100 } else { 101 BEGIN_BATCH(5); 102 OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | 103 GEN6_CONSTANT_BUFFER_0_ENABLE | 104 (5 - 2)); 105 /* Pointer to the WM constant buffer. Covered by the set of 106 * state flags from gen6_upload_wm_push_constants. 107 */ 108 OUT_BATCH(stage_state->push_const_offset + 109 stage_state->push_const_size - 1); 110 OUT_BATCH(0); 111 OUT_BATCH(0); 112 OUT_BATCH(0); 113 ADVANCE_BATCH(); 114 } 115 116 dw2 = dw4 = dw5 = dw6 = ksp2 = 0; 117 118 if (statistic_enable) 119 dw4 |= GEN6_WM_STATISTICS_ENABLE; 120 121 dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; 122 dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; 123 124 if (prog_data->base.use_alt_mode) 125 dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT; 126 127 dw2 |= (ALIGN(stage_state->sampler_count, 4) / 4) << 128 GEN6_WM_SAMPLER_COUNT_SHIFT; 129 130 dw2 |= ((prog_data->base.binding_table.size_bytes / 4) << 131 GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); 132 133 dw5 |= (devinfo->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; 134 135 if (prog_data->dispatch_8) 136 dw5 |= GEN6_WM_8_DISPATCH_ENABLE; 137 138 if (prog_data->dispatch_16) 139 dw5 |= GEN6_WM_16_DISPATCH_ENABLE; 140 141 dw4 |= prog_data->base.dispatch_grf_start_reg << 142 GEN6_WM_DISPATCH_START_GRF_SHIFT_0; 143 dw4 |= prog_data->dispatch_grf_start_reg_2 << 144 GEN6_WM_DISPATCH_START_GRF_SHIFT_2; 145 146 ksp0 = stage_state->prog_offset; 147 ksp2 = stage_state->prog_offset + prog_data->prog_offset_2; 148 149 if (dual_source_blend_enable) 150 dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE; 151 152 if (line_stipple_enable) 153 dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE; 154 155 if (polygon_stipple_enable) 156 dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE; 157 158 if (prog_data->uses_src_depth) 159 dw5 |= GEN6_WM_USES_SOURCE_DEPTH; 160 if (prog_data->uses_src_w) 161 dw5 |= GEN6_WM_USES_SOURCE_W; 162 if (prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF) 163 dw5 |= GEN6_WM_COMPUTED_DEPTH; 164 dw6 |= prog_data->barycentric_interp_modes << 165 GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; 166 167 if (kill_enable) 168 dw5 |= GEN6_WM_KILL_ENABLE; 169 170 if (color_buffer_write_enable || 171 dw5 & (GEN6_WM_KILL_ENABLE | GEN6_WM_COMPUTED_DEPTH)) 172 dw5 |= GEN6_WM_DISPATCH_ENABLE; 173 174 /* From the SNB PRM, volume 2 part 1, page 278: 175 * "This bit is inserted in the PS payload header and made available to 176 * the DataPort (either via the message header or via header bypass) to 177 * indicate that oMask data (one or two phases) is included in Render 178 * Target Write messages. If present, the oMask data is used to mask off 179 * samples." 180 */ 181 if (prog_data->uses_omask) 182 dw5 |= GEN6_WM_OMASK_TO_RENDER_TARGET; 183 184 dw6 |= prog_data->num_varying_inputs << 185 GEN6_WM_NUM_SF_OUTPUTS_SHIFT; 186 if (multisampled_fbo) { 187 if (msaa_enabled) 188 dw6 |= GEN6_WM_MSRAST_ON_PATTERN; 189 else 190 dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; 191 192 if (prog_data->persample_dispatch) 193 dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; 194 else { 195 dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; 196 } 197 } else { 198 dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; 199 dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; 200 } 201 202 /* From the SNB PRM, volume 2 part 1, page 281: 203 * "If the PS kernel does not need the Position XY Offsets 204 * to compute a Position XY value, then this field should be 205 * programmed to POSOFFSET_NONE." 206 * 207 * "SW Recommendation: If the PS kernel needs the Position Offsets 208 * to compute a Position XY value, this field should match Position 209 * ZW Interpolation Mode to ensure a consistent position.xyzw 210 * computation." 211 * We only require XY sample offsets. So, this recommendation doesn't 212 * look useful at the moment. We might need this in future. 213 */ 214 if (prog_data->uses_pos_offset) 215 dw6 |= GEN6_WM_POSOFFSET_SAMPLE; 216 else 217 dw6 |= GEN6_WM_POSOFFSET_NONE; 218 219 BEGIN_BATCH(9); 220 OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); 221 OUT_BATCH(ksp0); 222 OUT_BATCH(dw2); 223 if (prog_data->base.total_scratch) { 224 OUT_RELOC(stage_state->scratch_bo, 225 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 226 ffs(stage_state->per_thread_scratch) - 11); 227 } else { 228 OUT_BATCH(0); 229 } 230 OUT_BATCH(dw4); 231 OUT_BATCH(dw5); 232 OUT_BATCH(dw6); 233 OUT_BATCH(0); /* kernel 1 pointer */ 234 OUT_BATCH(ksp2); 235 ADVANCE_BATCH(); 236 } 237 238 static void 239 upload_wm_state(struct brw_context *brw) 240 { 241 struct gl_context *ctx = &brw->ctx; 242 /* BRW_NEW_FS_PROG_DATA */ 243 const struct brw_wm_prog_data *prog_data = 244 brw_wm_prog_data(brw->wm.base.prog_data); 245 246 /* _NEW_BUFFERS */ 247 const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; 248 249 /* BRW_NEW_FS_PROG_DATA | _NEW_COLOR */ 250 const bool dual_src_blend_enable = prog_data->dual_src_blend && 251 (ctx->Color.BlendEnabled & 1) && 252 ctx->Color.Blend[0]._UsesDualSrc; 253 254 /* _NEW_COLOR, _NEW_MULTISAMPLE _NEW_BUFFERS */ 255 const bool kill_enable = prog_data->uses_kill || 256 _mesa_is_alpha_test_enabled(ctx) || 257 _mesa_is_alpha_to_coverage_enabled(ctx) || 258 prog_data->uses_omask; 259 260 /* Rendering against the gl-context is always taken into account. */ 261 const bool statistic_enable = true; 262 263 /* _NEW_LINE | _NEW_POLYGON | _NEW_BUFFERS | _NEW_COLOR | 264 * _NEW_MULTISAMPLE 265 */ 266 gen6_upload_wm_state(brw, prog_data, &brw->wm.base, 267 multisampled_fbo, 268 dual_src_blend_enable, kill_enable, 269 brw_color_buffer_write_enabled(brw), 270 ctx->Multisample.Enabled, 271 ctx->Line.StippleFlag, ctx->Polygon.StippleFlag, 272 statistic_enable); 273 } 274 275 const struct brw_tracked_state gen6_wm_state = { 276 .dirty = { 277 .mesa = _NEW_BUFFERS | 278 _NEW_COLOR | 279 _NEW_LINE | 280 _NEW_MULTISAMPLE | 281 _NEW_POLYGON | 282 _NEW_PROGRAM_CONSTANTS, 283 .brw = BRW_NEW_BATCH | 284 BRW_NEW_BLORP | 285 BRW_NEW_FS_PROG_DATA | 286 BRW_NEW_PUSH_CONSTANT_ALLOCATION, 287 }, 288 .emit = upload_wm_state, 289 }; 290