1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 2012-2015 LunarG, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Chia-I Wu <olv (at) lunarg.com> 26 */ 27 28 #include "ilo_debug.h" 29 #include "ilo_vma.h" 30 #include "ilo_state_sol.h" 31 32 static bool 33 sol_stream_validate_gen7(const struct ilo_dev *dev, 34 const struct ilo_state_sol_stream_info *stream) 35 { 36 uint8_t i; 37 38 ILO_DEV_ASSERT(dev, 7, 8); 39 40 assert(stream->vue_read_base + stream->vue_read_count <= 41 stream->cv_vue_attr_count); 42 43 /* 44 * From the Ivy Bridge PRM, volume 2 part 1, page 200: 45 * 46 * "(Stream 0 Vertex Read Offset) 47 * Format: U1 count of 256-bit units 48 * 49 * Specifies amount of data to skip over before reading back Stream 0 50 * vertex data. Must be zero if the GS is enabled and the Output 51 * Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B 52 * unit)." 53 * 54 * "(Stream 0 Vertex Read Length) 55 * Format: U5-1 count of 256-bit units 56 * 57 * Specifies amount of vertex data to read back for Stream 0 vertices, 58 * starting at the Stream 0 Vertex Read Offset location. Maximum 59 * readback is 17 256-bit units (34 128-bit vertex attributes). Read 60 * data past the end of the valid vertex data has undefined contents, 61 * and therefore shouldn't be used to source stream out data. Must be 62 * zero (i.e., read length = 256b) if the GS is enabled and the Output 63 * Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B 64 * unit)." 65 */ 66 assert(stream->vue_read_base == 0 || stream->vue_read_base == 2); 67 assert(stream->vue_read_count <= 34); 68 69 assert(stream->decl_count <= ILO_STATE_SOL_MAX_DECL_COUNT); 70 71 for (i = 0; i < stream->decl_count; i++) { 72 const struct ilo_state_sol_decl_info *decl = &stream->decls[i]; 73 74 assert(decl->is_hole || decl->attr < stream->vue_read_count); 75 76 /* 77 * From the Ivy Bridge PRM, volume 2 part 1, page 205: 78 * 79 * "There is only enough internal storage for the 128-bit vertex 80 * header and 32 128-bit vertex attributes." 81 */ 82 assert(decl->attr < 33); 83 84 assert(decl->component_base < 4 && 85 decl->component_base + decl->component_count <= 4); 86 assert(decl->buffer < ILO_STATE_SOL_MAX_BUFFER_COUNT); 87 } 88 89 return true; 90 } 91 92 static bool 93 sol_validate_gen7(const struct ilo_dev *dev, 94 const struct ilo_state_sol_info *info) 95 { 96 uint8_t i; 97 98 ILO_DEV_ASSERT(dev, 7, 8); 99 100 /* 101 * From the Ivy Bridge PRM, volume 2 part 1, page 198: 102 * 103 * "This bit (Render Stream Select) is used even if SO Function Enable 104 * is DISABLED." 105 * 106 * From the Haswell PRM, volume 2b, page 796: 107 * 108 * "SO Function Enable must also be ENABLED in order for thiis field 109 * (Render Stream Select) to select a stream for rendering. When SO 110 * Function Enable is DISABLED and Rendering Disable is cleared (i.e., 111 * rendering is enabled), StreamID is ignored downstream of the SO 112 * stage, allowing any stream to be rendered." 113 * 114 * We want Gen7 behavior, but we have to require users to follow Gen7.5 115 * behavior: info->sol_enable must be set for info->render_stream to work. 116 */ 117 118 for (i = 0; i < ARRAY_SIZE(info->streams); i++) { 119 if (!sol_stream_validate_gen7(dev, &info->streams[i])) 120 return false; 121 } 122 123 /* 124 * From the Ivy Bridge PRM, volume 2 part 1, page 208: 125 * 126 * "(Surface Pitch) 127 * [0,2048] Must be 0 or a multiple of 4 Bytes." 128 */ 129 for (i = 0; i < ARRAY_SIZE(info->buffer_strides); i++) { 130 assert(info->buffer_strides[i] <= 2048 && 131 info->buffer_strides[i] % 4 == 0); 132 } 133 134 return true; 135 } 136 137 static bool 138 sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol *sol, 139 const struct ilo_dev *dev, 140 const struct ilo_state_sol_info *info) 141 { 142 struct { 143 uint8_t offset; 144 uint8_t len; 145 } vue_read[ILO_STATE_SOL_MAX_STREAM_COUNT]; 146 uint8_t i; 147 uint32_t dw1, dw2; 148 149 ILO_DEV_ASSERT(dev, 7, 8); 150 151 if (!sol_validate_gen7(dev, info)) 152 return false; 153 154 for (i = 0; i < ARRAY_SIZE(info->streams); i++) { 155 const struct ilo_state_sol_stream_info *stream = &info->streams[i]; 156 157 vue_read[i].offset = stream->vue_read_base / 2; 158 /* 159 * In pairs minus 1. URB entries are aligned to 512-bits. There is no 160 * need to worry about reading past entries. 161 */ 162 vue_read[i].len = (stream->vue_read_count + 1) / 2; 163 if (vue_read[i].len) 164 vue_read[i].len--; 165 } 166 167 dw1 = info->render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT | 168 info->tristrip_reorder << GEN7_SO_DW1_REORDER_MODE__SHIFT; 169 170 if (info->sol_enable) 171 dw1 |= GEN7_SO_DW1_SO_ENABLE; 172 173 if (info->render_disable) 174 dw1 |= GEN7_SO_DW1_RENDER_DISABLE; 175 176 if (info->stats_enable) 177 dw1 |= GEN7_SO_DW1_STATISTICS; 178 179 if (ilo_dev_gen(dev) < ILO_GEN(8)) { 180 const uint8_t buffer_enables = ((bool) info->buffer_strides[3]) << 3 | 181 ((bool) info->buffer_strides[2]) << 2 | 182 ((bool) info->buffer_strides[1]) << 1 | 183 ((bool) info->buffer_strides[0]); 184 dw1 |= buffer_enables << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT; 185 } 186 187 dw2 = vue_read[3].offset << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT | 188 vue_read[3].len << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT | 189 vue_read[2].offset << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT | 190 vue_read[2].len << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT | 191 vue_read[1].offset << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT | 192 vue_read[1].len << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT | 193 vue_read[0].offset << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT | 194 vue_read[0].len << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT; 195 196 STATIC_ASSERT(ARRAY_SIZE(sol->streamout) >= 2); 197 sol->streamout[0] = dw1; 198 sol->streamout[1] = dw2; 199 200 memcpy(sol->strides, info->buffer_strides, sizeof(sol->strides)); 201 202 return true; 203 } 204 205 static bool 206 sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol *sol, 207 const struct ilo_dev *dev, 208 const struct ilo_state_sol_info *info, 209 uint8_t max_decl_count) 210 { 211 uint64_t decl_list[ILO_STATE_SOL_MAX_DECL_COUNT]; 212 uint8_t decl_counts[ILO_STATE_SOL_MAX_STREAM_COUNT]; 213 uint8_t buffer_selects[ILO_STATE_SOL_MAX_STREAM_COUNT]; 214 uint32_t dw1, dw2; 215 uint8_t i, j; 216 217 ILO_DEV_ASSERT(dev, 7, 8); 218 219 memset(decl_list, 0, sizeof(decl_list[0]) * max_decl_count); 220 221 for (i = 0; i < ARRAY_SIZE(info->streams); i++) { 222 const struct ilo_state_sol_stream_info *stream = &info->streams[i]; 223 224 assert(stream->decl_count <= max_decl_count); 225 decl_counts[i] = stream->decl_count; 226 buffer_selects[i] = 0; 227 228 for (j = 0; j < stream->decl_count; j++) { 229 const struct ilo_state_sol_decl_info *decl = &stream->decls[j]; 230 const uint8_t mask = ((1 << decl->component_count) - 1) << 231 decl->component_base; 232 uint16_t val; 233 234 val = decl->buffer << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | 235 mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; 236 237 if (decl->is_hole) 238 val |= GEN7_SO_DECL_HOLE_FLAG; 239 else 240 val |= decl->attr << GEN7_SO_DECL_REG_INDEX__SHIFT; 241 242 decl_list[j] |= (uint64_t) val << (16 * i); 243 buffer_selects[i] |= 1 << decl->buffer; 244 } 245 } 246 247 dw1 = buffer_selects[3] << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT | 248 buffer_selects[2] << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT | 249 buffer_selects[1] << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT | 250 buffer_selects[0] << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT; 251 dw2 = decl_counts[3] << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT | 252 decl_counts[2] << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT | 253 decl_counts[1] << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT | 254 decl_counts[0] << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT; 255 256 STATIC_ASSERT(ARRAY_SIZE(sol->so_decl) >= 2); 257 sol->so_decl[0] = dw1; 258 sol->so_decl[1] = dw2; 259 260 STATIC_ASSERT(ARRAY_SIZE(sol->decl[0]) == 2); 261 memcpy(sol->decl, decl_list, sizeof(sol->decl[0]) * max_decl_count); 262 sol->decl_count = max_decl_count; 263 264 return true; 265 } 266 267 static bool 268 sol_buffer_validate_gen7(const struct ilo_dev *dev, 269 const struct ilo_state_sol_buffer_info *info) 270 { 271 ILO_DEV_ASSERT(dev, 7, 8); 272 273 /* 274 * From the Ivy Bridge PRM, volume 2 part 1, page 208: 275 * 276 * "(Surface Base Address) This field specifies the starting DWord 277 * address..." 278 */ 279 assert(info->offset % 4 == 0); 280 281 if (info->vma) { 282 assert(info->vma->vm_alignment % 4 == 0); 283 assert(info->size && info->offset + info->size <= info->vma->vm_size); 284 } 285 286 /* Gen8+ only */ 287 if (info->write_offset_load || info->write_offset_save) { 288 assert(ilo_dev_gen(dev) >= ILO_GEN(8) && info->write_offset_vma); 289 assert(info->write_offset_offset + sizeof(uint32_t) <= 290 info->write_offset_vma->vm_size); 291 } 292 293 /* 294 * From the Broadwell PRM, volume 2b, page 206: 295 * 296 * "This field (Stream Offset) specifies the Offset in stream output 297 * buffer to start at, or whether to append to the end of an existing 298 * buffer. The Offset must be DWORD aligned." 299 */ 300 if (info->write_offset_imm_enable) { 301 assert(info->write_offset_load); 302 assert(info->write_offset_imm % 4 == 0); 303 } 304 305 return true; 306 } 307 308 static uint32_t 309 sol_buffer_get_gen6_size(const struct ilo_dev *dev, 310 const struct ilo_state_sol_buffer_info *info) 311 { 312 ILO_DEV_ASSERT(dev, 6, 8); 313 314 /* 315 * From the Ivy Bridge PRM, volume 2 part 1, page 208: 316 * 317 * "(Surface End Address) This field specifies the ending DWord 318 * address..." 319 */ 320 return (info->vma) ? info->size & ~3 : 0; 321 } 322 323 static bool 324 sol_buffer_set_gen7_3dstate_so_buffer(struct ilo_state_sol_buffer *sb, 325 const struct ilo_dev *dev, 326 const struct ilo_state_sol_buffer_info *info) 327 { 328 const uint32_t size = sol_buffer_get_gen6_size(dev, info); 329 330 ILO_DEV_ASSERT(dev, 7, 7.5); 331 332 if (!sol_buffer_validate_gen7(dev, info)) 333 return false; 334 335 STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 2); 336 sb->so_buf[0] = info->offset; 337 sb->so_buf[1] = (size) ? info->offset + size : 0; 338 339 return true; 340 } 341 342 static bool 343 sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer *sb, 344 const struct ilo_dev *dev, 345 const struct ilo_state_sol_buffer_info *info) 346 { 347 const uint32_t size = sol_buffer_get_gen6_size(dev, info); 348 uint32_t dw1; 349 350 ILO_DEV_ASSERT(dev, 8, 8); 351 352 if (!sol_buffer_validate_gen7(dev, info)) 353 return false; 354 355 dw1 = 0; 356 357 if (info->vma) 358 dw1 |= GEN8_SO_BUF_DW1_ENABLE; 359 if (info->write_offset_load) 360 dw1 |= GEN8_SO_BUF_DW1_OFFSET_WRITE_ENABLE; 361 if (info->write_offset_save) 362 dw1 |= GEN8_SO_BUF_DW1_OFFSET_ENABLE; 363 364 STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 4); 365 sb->so_buf[0] = dw1; 366 sb->so_buf[1] = info->offset; 367 368 /* 369 * From the Broadwell PRM, volume 2b, page 205: 370 * 371 * "This field (Surface Size) specifies the size of buffer in number 372 * DWords minus 1 of the buffer in Graphics Memory." 373 */ 374 sb->so_buf[2] = (size) ? size / 4 - 1 : 0; 375 376 /* load from imm or sb->write_offset_bo */ 377 sb->so_buf[3] = (info->write_offset_imm_enable) ? 378 info->write_offset_imm : ~0u; 379 380 return true; 381 } 382 383 bool 384 ilo_state_sol_init(struct ilo_state_sol *sol, 385 const struct ilo_dev *dev, 386 const struct ilo_state_sol_info *info) 387 { 388 bool ret = true; 389 390 assert(ilo_is_zeroed(sol, sizeof(*sol))); 391 assert(ilo_is_zeroed(info->data, info->data_size)); 392 393 if (ilo_dev_gen(dev) >= ILO_GEN(7)) { 394 uint8_t max_decl_count, i; 395 396 max_decl_count = info->streams[0].decl_count; 397 for (i = 1; i < ARRAY_SIZE(info->streams); i++) { 398 if (max_decl_count < info->streams[i].decl_count) 399 max_decl_count = info->streams[i].decl_count; 400 } 401 402 assert(ilo_state_sol_data_size(dev, max_decl_count) <= info->data_size); 403 sol->decl = (uint32_t (*)[2]) info->data; 404 405 ret &= sol_set_gen7_3DSTATE_STREAMOUT(sol, dev, info); 406 ret &= sol_set_gen7_3DSTATE_SO_DECL_LIST(sol, dev, info, max_decl_count); 407 } 408 409 assert(ret); 410 411 return ret; 412 } 413 414 bool 415 ilo_state_sol_init_disabled(struct ilo_state_sol *sol, 416 const struct ilo_dev *dev, 417 bool render_disable) 418 { 419 struct ilo_state_sol_info info; 420 421 memset(&info, 0, sizeof(info)); 422 info.render_disable = render_disable; 423 424 return ilo_state_sol_init(sol, dev, &info); 425 } 426 427 uint32_t 428 ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size, 429 uint32_t *alignment) 430 { 431 /* DWord aligned without padding */ 432 *alignment = 4; 433 return size; 434 } 435 436 bool 437 ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb, 438 const struct ilo_dev *dev, 439 const struct ilo_state_sol_buffer_info *info) 440 { 441 bool ret = true; 442 443 assert(ilo_is_zeroed(sb, sizeof(*sb))); 444 445 if (ilo_dev_gen(dev) >= ILO_GEN(8)) 446 ret &= sol_buffer_set_gen8_3dstate_so_buffer(sb, dev, info); 447 else 448 ret &= sol_buffer_set_gen7_3dstate_so_buffer(sb, dev, info); 449 450 sb->vma = info->vma; 451 sb->write_offset_vma = info->write_offset_vma; 452 453 assert(ret); 454 455 return ret; 456 } 457 458 bool 459 ilo_state_sol_buffer_init_disabled(struct ilo_state_sol_buffer *sb, 460 const struct ilo_dev *dev) 461 { 462 struct ilo_state_sol_buffer_info info; 463 464 memset(&info, 0, sizeof(info)); 465 466 return ilo_state_sol_buffer_init(sb, dev, &info); 467 } 468