1 /* 2 * Copyright (c) 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <stdlib.h> 25 #include <math.h> 26 27 #include "util/macros.h" 28 #include "main/macros.h" 29 30 #include "gen_l3_config.h" 31 32 /** 33 * The following diagram shows how we partition the URB: 34 * 35 * 16kb or 32kb Rest of the URB space 36 * __________-__________ _________________-_________________ 37 * / \ / \ 38 * +-------------------------------------------------------------+ 39 * | VS/HS/DS/GS/FS Push | VS/HS/DS/GS URB | 40 * | Constants | Entries | 41 * +-------------------------------------------------------------+ 42 * 43 * Push constants must be stored at the beginning of the URB space, 44 * while URB entries can be stored anywhere. We choose to lay them 45 * out in pipeline order (VS -> HS -> DS -> GS). 46 */ 47 48 /** 49 * Decide how to partition the URB among the various stages. 50 * 51 * \param[in] push_constant_bytes - space allocate for push constants. 52 * \param[in] urb_size_bytes - total size of the URB (from L3 config). 53 * \param[in] tess_present - are tessellation shaders active? 54 * \param[in] gs_present - are geometry shaders active? 55 * \param[in] entry_size - the URB entry size (from the shader compiler) 56 * \param[out] entries - the number of URB entries for each stage 57 * \param[out] start - the starting offset for each stage 58 */ 59 void 60 gen_get_urb_config(const struct gen_device_info *devinfo, 61 unsigned push_constant_bytes, unsigned urb_size_bytes, 62 bool tess_present, bool gs_present, 63 const unsigned entry_size[4], 64 unsigned entries[4], unsigned start[4]) 65 { 66 const bool active[4] = { true, tess_present, tess_present, gs_present }; 67 68 /* URB allocations must be done in 8k chunks. */ 69 const unsigned chunk_size_bytes = 8192; 70 71 const unsigned push_constant_chunks = 72 push_constant_bytes / chunk_size_bytes; 73 const unsigned urb_chunks = urb_size_bytes / chunk_size_bytes; 74 75 /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): 76 * 77 * VS Number of URB Entries must be divisible by 8 if the VS URB Entry 78 * Allocation Size is less than 9 512-bit URB entries. 79 * 80 * Similar text exists for HS, DS and GS. 81 */ 82 unsigned granularity[4]; 83 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 84 granularity[i] = (entry_size[i] < 9) ? 8 : 1; 85 } 86 87 unsigned min_entries[4] = { 88 /* VS has a lower limit on the number of URB entries. 89 * 90 * From the Broadwell PRM, 3DSTATE_URB_VS instruction: 91 * "When tessellation is enabled, the VS Number of URB Entries must be 92 * greater than or equal to 192." 93 */ 94 [MESA_SHADER_VERTEX] = tess_present && devinfo->gen == 8 ? 95 192 : devinfo->urb.min_entries[MESA_SHADER_VERTEX], 96 97 /* There are two constraints on the minimum amount of URB space we can 98 * allocate: 99 * 100 * (1) We need room for at least 2 URB entries, since we always operate 101 * the GS in DUAL_OBJECT mode. 102 * 103 * (2) We can't allocate less than nr_gs_entries_granularity. 104 */ 105 [MESA_SHADER_GEOMETRY] = gs_present ? 2 : 0, 106 107 [MESA_SHADER_TESS_CTRL] = tess_present ? 1 : 0, 108 109 [MESA_SHADER_TESS_EVAL] = tess_present ? 110 devinfo->urb.min_entries[MESA_SHADER_TESS_EVAL] : 0, 111 }; 112 113 /* Min VS Entries isn't a multiple of 8 on Cherryview/Broxton; round up. 114 * Round them all up. 115 */ 116 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 117 min_entries[i] = ALIGN(min_entries[i], granularity[i]); 118 } 119 120 unsigned entry_size_bytes[4]; 121 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 122 entry_size_bytes[i] = 64 * entry_size[i]; 123 } 124 125 /* Initially, assign each stage the minimum amount of URB space it needs, 126 * and make a note of how much additional space it "wants" (the amount of 127 * additional space it could actually make use of). 128 */ 129 unsigned chunks[4]; 130 unsigned wants[4]; 131 unsigned total_needs = push_constant_chunks; 132 unsigned total_wants = 0; 133 134 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 135 if (active[i]) { 136 chunks[i] = DIV_ROUND_UP(min_entries[i] * entry_size_bytes[i], 137 chunk_size_bytes); 138 139 wants[i] = 140 DIV_ROUND_UP(devinfo->urb.max_entries[i] * entry_size_bytes[i], 141 chunk_size_bytes) - chunks[i]; 142 } else { 143 chunks[i] = 0; 144 wants[i] = 0; 145 } 146 147 total_needs += chunks[i]; 148 total_wants += wants[i]; 149 } 150 151 assert(total_needs <= urb_chunks); 152 153 /* Mete out remaining space (if any) in proportion to "wants". */ 154 unsigned remaining_space = MIN2(urb_chunks - total_needs, total_wants); 155 156 if (remaining_space > 0) { 157 for (int i = MESA_SHADER_VERTEX; 158 total_wants > 0 && i <= MESA_SHADER_TESS_EVAL; i++) { 159 unsigned additional = (unsigned) 160 roundf(wants[i] * (((float) remaining_space) / total_wants)); 161 chunks[i] += additional; 162 remaining_space -= additional; 163 total_wants -= wants[i]; 164 } 165 166 chunks[MESA_SHADER_GEOMETRY] += remaining_space; 167 } 168 169 /* Sanity check that we haven't over-allocated. */ 170 unsigned total_chunks = push_constant_chunks; 171 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 172 total_chunks += chunks[i]; 173 } 174 assert(total_chunks <= urb_chunks); 175 176 /* Finally, compute the number of entries that can fit in the space 177 * allocated to each stage. 178 */ 179 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 180 entries[i] = chunks[i] * chunk_size_bytes / entry_size_bytes[i]; 181 182 /* Since we rounded up when computing wants[], this may be slightly 183 * more than the maximum allowed amount, so correct for that. 184 */ 185 entries[i] = MIN2(entries[i], devinfo->urb.max_entries[i]); 186 187 /* Ensure that we program a multiple of the granularity. */ 188 entries[i] = ROUND_DOWN_TO(entries[i], granularity[i]); 189 190 /* Finally, sanity check to make sure we have at least the minimum 191 * number of entries needed for each stage. 192 */ 193 assert(entries[i] >= min_entries[i]); 194 } 195 196 /* Lay out the URB in pipeline order: push constants, VS, HS, DS, GS. */ 197 start[0] = push_constant_chunks; 198 for (int i = MESA_SHADER_TESS_CTRL; i <= MESA_SHADER_GEOMETRY; i++) { 199 start[i] = start[i - 1] + chunks[i - 1]; 200 } 201 } 202