1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 2014 LunarG, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Chia-I Wu <olv (at) lunarg.com> 26 */ 27 28 #include "genhw/genhw.h" 29 #include "core/ilo_builder_media.h" 30 #include "core/ilo_builder_mi.h" 31 #include "core/ilo_builder_render.h" 32 33 #include "ilo_shader.h" 34 #include "ilo_state.h" 35 #include "ilo_render_gen.h" 36 37 struct gen7_l3_config { 38 int slm; 39 int urb; 40 int rest; 41 int dc; 42 int ro; 43 int is; 44 int c; 45 int t; 46 }; 47 48 /* 49 * From the Ivy Bridge PRM, volume 1 part 7, page 10: 50 * 51 * "Normal L3/URB mode (non-SLM mode), uses all 4 banks of L3 equally to 52 * distribute cycles. The following allocation is a suggested programming 53 * model. Note all numbers below are given in KBytes." 54 * 55 * From the Haswell PRM, volume 7, page 662: 56 * 57 * "The configuration for {SLM = 0,URB = 224,DC = 32,RO = 256,IS = 0,C = 58 * 0,T =0, SUM 512} was validated as a later supported configuration and 59 * can be utilized if desired." 60 */ 61 static const struct gen7_l3_config gen7_l3_non_slm_configs[] = { 62 /* SLM URB Rest DC RO I/S C T */ 63 [0] = { 0, 256, 0, 0, 256, 0, 0, 0, }, 64 [1] = { 0, 256, 0, 128, 128, 0, 0, 0, }, 65 [2] = { 0, 256, 0, 32, 0, 64, 32, 128, }, 66 [3] = { 0, 224, 0, 64, 0, 64, 32, 128, }, 67 [4] = { 0, 224, 0, 128, 0, 64, 32, 64, }, 68 [5] = { 0, 224, 0, 64, 0, 128, 32, 64, }, 69 [6] = { 0, 224, 0, 0, 0, 128, 32, 128, }, 70 [7] = { 0, 256, 0, 0, 0, 128, 0, 128, }, 71 72 [8] = { 0, 224, 0, 32, 256, 0, 0, 0, }, 73 }; 74 75 /* 76 * From the Ivy Bridge PRM, volume 1 part 7, page 11: 77 * 78 * "With the existence of Shared Local Memory, a 64KB chunk from each of 79 * the 2 L3 banks will be reserved for SLM usage. The remaining cache 80 * space is divided between the remaining clients. SLM allocation is done 81 * via reducing the number of ways on the two banks from 64 to 32." 82 * 83 * From the Haswell PRM, volume 7, page 662: 84 * 85 * "The configuration for {SLM = 128,URB = 128,DC = 0,RO = 256,IS = 0,C = 86 * 0,T =0, SUM 512} was validated as a later supported configuration and 87 * can be utilized if desired. For this configuration, global atomics 88 * must be programmed to be in GTI." 89 */ 90 static const struct gen7_l3_config gen7_l3_slm_configs[] = { 91 /* SLM URB Rest DC RO I/S C T */ 92 [0] = { 128, 128, 0, 128, 128, 0, 0, 0, }, 93 [1] = { 128, 128, 0, 64, 0, 64, 64, 64, }, 94 [2] = { 128, 128, 0, 32, 0, 64, 32, 128, }, 95 [3] = { 128, 128, 0, 32, 0, 128, 32, 64, }, 96 97 [4] = { 128, 128, 0, 0, 256, 0, 0, 0, }, 98 }; 99 100 static void 101 gen7_launch_grid_l3(struct ilo_render *r, bool use_slm) 102 { 103 uint32_t l3sqcreg1, l3cntlreg2, l3cntlreg3; 104 const struct gen7_l3_config *conf; 105 106 /* 107 * This function mostly follows what beignet does. I do not know why, for 108 * example, CON4DCUNC should be reset. I do not know if it should be set 109 * again after launch_grid(). 110 */ 111 112 ILO_DEV_ASSERT(r->dev, 7, 7.5); 113 114 if (use_slm) 115 conf = &gen7_l3_slm_configs[1]; 116 else 117 conf = &gen7_l3_non_slm_configs[4]; 118 119 /* unset GEN7_REG_L3SQCREG1_CON4DCUNC (without readback first) */ 120 if (ilo_dev_gen(r->dev) >= ILO_GEN(7.5)) { 121 l3sqcreg1 = GEN75_REG_L3SQCREG1_SQGPCI_24 | 122 GEN75_REG_L3SQCREG1_SQHPCI_8; 123 } else { 124 l3sqcreg1 = GEN7_REG_L3SQCREG1_SQGHPCI_18_6; 125 } 126 127 l3cntlreg2 = (conf->dc / 8) << GEN7_REG_L3CNTLREG2_DCWASS__SHIFT | 128 (conf->ro / 8) << GEN7_REG_L3CNTLREG2_RDOCPL__SHIFT | 129 (conf->urb / 8) << GEN7_REG_L3CNTLREG2_URBALL__SHIFT; 130 131 l3cntlreg3 = (conf->t / 8) << GEN7_REG_L3CNTLREG3_TXWYALL__SHIFT | 132 (conf->c / 8) << GEN7_REG_L3CNTLREG3_CTWYALL__SHIFT | 133 (conf->is / 8) << GEN7_REG_L3CNTLREG3_ISWYALL__SHIFT; 134 135 if (conf->slm) { 136 /* 137 * From the Ivy Bridge PRM, volume 1 part 7, page 11: 138 * 139 * "Note that URB needs to be set as low b/w client in SLM mode, 140 * else the hash will fail. This is a required s/w model." 141 */ 142 l3cntlreg2 |= GEN7_REG_L3CNTLREG2_URBSLMB | 143 GEN7_REG_L3CNTLREG2_SLMMENB; 144 } 145 146 gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3SQCREG1, l3sqcreg1); 147 gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3CNTLREG2, l3cntlreg2); 148 gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3CNTLREG3, l3cntlreg3); 149 } 150 151 int 152 ilo_render_get_launch_grid_commands_len(const struct ilo_render *render, 153 const struct ilo_state_vector *vec) 154 { 155 static int len; 156 157 ILO_DEV_ASSERT(render->dev, 7, 7.5); 158 159 if (!len) { 160 len += 161 GEN6_PIPELINE_SELECT__SIZE + 162 GEN6_STATE_BASE_ADDRESS__SIZE + 163 GEN6_MEDIA_VFE_STATE__SIZE + 164 GEN6_MEDIA_CURBE_LOAD__SIZE + 165 GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD__SIZE + 166 GEN6_MEDIA_STATE_FLUSH__SIZE; 167 168 len += ilo_render_get_flush_len(render) * 3; 169 170 if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) { 171 len += GEN6_MI_LOAD_REGISTER_IMM__SIZE * 3 * 2; 172 len += GEN7_GPGPU_WALKER__SIZE; 173 } 174 } 175 176 return len; 177 } 178 179 void 180 ilo_render_emit_launch_grid_commands(struct ilo_render *render, 181 const struct ilo_state_vector *vec, 182 const struct ilo_render_launch_grid_session *session) 183 { 184 const unsigned batch_used = ilo_builder_batch_used(render->builder); 185 const uint32_t pcb = render->state.cs.PUSH_CONSTANT_BUFFER; 186 const int pcb_size = render->state.cs.PUSH_CONSTANT_BUFFER_size; 187 int simd_size; 188 bool use_slm; 189 190 ILO_DEV_ASSERT(render->dev, 7, 7.5); 191 192 simd_size = ilo_shader_get_kernel_param(vec->cs, ILO_KERNEL_CS_SIMD_SIZE); 193 use_slm = ilo_shader_get_kernel_param(vec->cs, ILO_KERNEL_CS_LOCAL_SIZE); 194 195 ilo_render_emit_flush(render); 196 197 if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) { 198 gen7_launch_grid_l3(render, use_slm); 199 ilo_render_emit_flush(render); 200 201 gen6_PIPELINE_SELECT(render->builder, 202 GEN7_PIPELINE_SELECT_DW0_SELECT_GPGPU); 203 } else { 204 gen6_PIPELINE_SELECT(render->builder, 205 GEN6_PIPELINE_SELECT_DW0_SELECT_MEDIA); 206 } 207 208 gen6_state_base_address(render->builder, true); 209 210 gen6_MEDIA_VFE_STATE(render->builder, &session->compute); 211 212 if (pcb_size) 213 gen6_MEDIA_CURBE_LOAD(render->builder, pcb, pcb_size); 214 215 gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD(render->builder, 216 session->idrt, session->idrt_size); 217 218 gen7_GPGPU_WALKER(render->builder, session->thread_group_offset, 219 session->thread_group_dim, session->thread_group_size, simd_size); 220 221 gen6_MEDIA_STATE_FLUSH(render->builder); 222 223 if (ilo_dev_gen(render->dev) >= ILO_GEN(7) && use_slm) { 224 ilo_render_emit_flush(render); 225 gen7_launch_grid_l3(render, false); 226 } 227 228 assert(ilo_builder_batch_used(render->builder) <= batch_used + 229 ilo_render_get_launch_grid_commands_len(render, vec)); 230 } 231