1 /* 2 * Copyright (c) 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file lower_buffer_access.cpp 26 * 27 * Helper for IR lowering pass to replace dereferences of buffer object based 28 * shader variables with intrinsic function calls. 29 * 30 * This helper is used by lowering passes for UBOs, SSBOs and compute shader 31 * shared variables. 32 */ 33 34 #include "lower_buffer_access.h" 35 #include "ir_builder.h" 36 #include "main/macros.h" 37 #include "util/list.h" 38 #include "glsl_parser_extras.h" 39 40 using namespace ir_builder; 41 42 namespace lower_buffer_access { 43 44 static inline int 45 writemask_for_size(unsigned n) 46 { 47 return ((1 << n) - 1); 48 } 49 50 /** 51 * Takes a deref and recursively calls itself to break the deref down to the 52 * point that the reads or writes generated are contiguous scalars or vectors. 53 */ 54 void 55 lower_buffer_access::emit_access(void *mem_ctx, 56 bool is_write, 57 ir_dereference *deref, 58 ir_variable *base_offset, 59 unsigned int deref_offset, 60 bool row_major, 61 int matrix_columns, 62 unsigned int packing, 63 unsigned int write_mask) 64 { 65 if (deref->type->is_record()) { 66 unsigned int field_offset = 0; 67 68 for (unsigned i = 0; i < deref->type->length; i++) { 69 const struct glsl_struct_field *field = 70 &deref->type->fields.structure[i]; 71 ir_dereference *field_deref = 72 new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL), 73 field->name); 74 75 field_offset = 76 glsl_align(field_offset, 77 field->type->std140_base_alignment(row_major)); 78 79 emit_access(mem_ctx, is_write, field_deref, base_offset, 80 deref_offset + field_offset, 81 row_major, 1, packing, 82 writemask_for_size(field_deref->type->vector_elements)); 83 84 field_offset += field->type->std140_size(row_major); 85 } 86 return; 87 } 88 89 if (deref->type->is_array()) { 90 unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ? 91 deref->type->fields.array->std430_array_stride(row_major) : 92 glsl_align(deref->type->fields.array->std140_size(row_major), 16); 93 94 for (unsigned i = 0; i < deref->type->length; i++) { 95 ir_constant *element = new(mem_ctx) ir_constant(i); 96 ir_dereference *element_deref = 97 new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), 98 element); 99 emit_access(mem_ctx, is_write, element_deref, base_offset, 100 deref_offset + i * array_stride, 101 row_major, 1, packing, 102 writemask_for_size(element_deref->type->vector_elements)); 103 } 104 return; 105 } 106 107 if (deref->type->is_matrix()) { 108 for (unsigned i = 0; i < deref->type->matrix_columns; i++) { 109 ir_constant *col = new(mem_ctx) ir_constant(i); 110 ir_dereference *col_deref = 111 new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col); 112 113 if (row_major) { 114 /* For a row-major matrix, the next column starts at the next 115 * element. 116 */ 117 int size_mul = deref->type->is_64bit() ? 8 : 4; 118 emit_access(mem_ctx, is_write, col_deref, base_offset, 119 deref_offset + i * size_mul, 120 row_major, deref->type->matrix_columns, packing, 121 writemask_for_size(col_deref->type->vector_elements)); 122 } else { 123 int size_mul; 124 125 /* std430 doesn't round up vec2 size to a vec4 size */ 126 if (packing == GLSL_INTERFACE_PACKING_STD430 && 127 deref->type->vector_elements == 2 && 128 !deref->type->is_64bit()) { 129 size_mul = 8; 130 } else { 131 /* std140 always rounds the stride of arrays (and matrices) to a 132 * vec4, so matrices are always 16 between columns/rows. With 133 * doubles, they will be 32 apart when there are more than 2 rows. 134 * 135 * For both std140 and std430, if the member is a 136 * three-'component vector with components consuming N basic 137 * machine units, the base alignment is 4N. For vec4, base 138 * alignment is 4N. 139 */ 140 size_mul = (deref->type->is_64bit() && 141 deref->type->vector_elements > 2) ? 32 : 16; 142 } 143 144 emit_access(mem_ctx, is_write, col_deref, base_offset, 145 deref_offset + i * size_mul, 146 row_major, deref->type->matrix_columns, packing, 147 writemask_for_size(col_deref->type->vector_elements)); 148 } 149 } 150 return; 151 } 152 153 assert(deref->type->is_scalar() || deref->type->is_vector()); 154 155 if (!row_major) { 156 ir_rvalue *offset = 157 add(base_offset, new(mem_ctx) ir_constant(deref_offset)); 158 unsigned mask = 159 is_write ? write_mask : (1 << deref->type->vector_elements) - 1; 160 insert_buffer_access(mem_ctx, deref, deref->type, offset, mask, -1); 161 } else { 162 unsigned N = deref->type->is_64bit() ? 8 : 4; 163 164 /* We're dereffing a column out of a row-major matrix, so we 165 * gather the vector from each stored row. 166 */ 167 assert(deref->type->base_type == GLSL_TYPE_FLOAT || 168 deref->type->base_type == GLSL_TYPE_DOUBLE); 169 /* Matrices, row_major or not, are stored as if they were 170 * arrays of vectors of the appropriate size in std140. 171 * Arrays have their strides rounded up to a vec4, so the 172 * matrix stride is always 16. However a double matrix may either be 16 173 * or 32 depending on the number of columns. 174 */ 175 assert(matrix_columns <= 4); 176 unsigned matrix_stride = 0; 177 /* Matrix stride for std430 mat2xY matrices are not rounded up to 178 * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform 179 * Block Layout": 180 * 181 * "2. If the member is a two- or four-component vector with components 182 * consuming N basic machine units, the base alignment is 2N or 4N, 183 * respectively." [...] 184 * "4. If the member is an array of scalars or vectors, the base alignment 185 * and array stride are set to match the base alignment of a single array 186 * element, according to rules (1), (2), and (3), and rounded up to the 187 * base alignment of a vec4." [...] 188 * "7. If the member is a row-major matrix with C columns and R rows, the 189 * matrix is stored identically to an array of R row vectors with C 190 * components each, according to rule (4)." [...] 191 * "When using the std430 storage layout, shader storage blocks will be 192 * laid out in buffer storage identically to uniform and shader storage 193 * blocks using the std140 layout, except that the base alignment and 194 * stride of arrays of scalars and vectors in rule 4 and of structures in 195 * rule 9 are not rounded up a multiple of the base alignment of a vec4." 196 */ 197 if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2) 198 matrix_stride = 2 * N; 199 else 200 matrix_stride = glsl_align(matrix_columns * N, 16); 201 202 const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ? 203 glsl_type::float_type : glsl_type::double_type; 204 205 for (unsigned i = 0; i < deref->type->vector_elements; i++) { 206 ir_rvalue *chan_offset = 207 add(base_offset, 208 new(mem_ctx) ir_constant(deref_offset + i * matrix_stride)); 209 if (!is_write || ((1U << i) & write_mask)) 210 insert_buffer_access(mem_ctx, deref, deref_type, chan_offset, 211 (1U << i), i); 212 } 213 } 214 } 215 216 /** 217 * Determine if a thing being dereferenced is row-major 218 * 219 * There is some trickery here. 220 * 221 * If the thing being dereferenced is a member of uniform block \b without an 222 * instance name, then the name of the \c ir_variable is the field name of an 223 * interface type. If this field is row-major, then the thing referenced is 224 * row-major. 225 * 226 * If the thing being dereferenced is a member of uniform block \b with an 227 * instance name, then the last dereference in the tree will be an 228 * \c ir_dereference_record. If that record field is row-major, then the 229 * thing referenced is row-major. 230 */ 231 bool 232 lower_buffer_access::is_dereferenced_thing_row_major(const ir_rvalue *deref) 233 { 234 bool matrix = false; 235 const ir_rvalue *ir = deref; 236 237 while (true) { 238 matrix = matrix || ir->type->without_array()->is_matrix(); 239 240 switch (ir->ir_type) { 241 case ir_type_dereference_array: { 242 const ir_dereference_array *const array_deref = 243 (const ir_dereference_array *) ir; 244 245 ir = array_deref->array; 246 break; 247 } 248 249 case ir_type_dereference_record: { 250 const ir_dereference_record *const record_deref = 251 (const ir_dereference_record *) ir; 252 253 ir = record_deref->record; 254 255 const int idx = ir->type->field_index(record_deref->field); 256 assert(idx >= 0); 257 258 const enum glsl_matrix_layout matrix_layout = 259 glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout); 260 261 switch (matrix_layout) { 262 case GLSL_MATRIX_LAYOUT_INHERITED: 263 break; 264 case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: 265 return false; 266 case GLSL_MATRIX_LAYOUT_ROW_MAJOR: 267 return matrix || deref->type->without_array()->is_record(); 268 } 269 270 break; 271 } 272 273 case ir_type_dereference_variable: { 274 const ir_dereference_variable *const var_deref = 275 (const ir_dereference_variable *) ir; 276 277 const enum glsl_matrix_layout matrix_layout = 278 glsl_matrix_layout(var_deref->var->data.matrix_layout); 279 280 switch (matrix_layout) { 281 case GLSL_MATRIX_LAYOUT_INHERITED: { 282 /* For interface block matrix variables we handle inherited 283 * layouts at HIR generation time, but we don't do that for shared 284 * variables, which are always column-major 285 */ 286 MAYBE_UNUSED ir_variable *var = deref->variable_referenced(); 287 assert((var->is_in_buffer_block() && !matrix) || 288 var->data.mode == ir_var_shader_shared); 289 return false; 290 } 291 case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: 292 return false; 293 case GLSL_MATRIX_LAYOUT_ROW_MAJOR: 294 return matrix || deref->type->without_array()->is_record(); 295 } 296 297 unreachable("invalid matrix layout"); 298 break; 299 } 300 301 default: 302 return false; 303 } 304 } 305 306 /* The tree must have ended with a dereference that wasn't an 307 * ir_dereference_variable. That is invalid, and it should be impossible. 308 */ 309 unreachable("invalid dereference tree"); 310 return false; 311 } 312 313 /** 314 * This function initializes various values that will be used later by 315 * emit_access when actually emitting loads or stores. 316 * 317 * Note: const_offset is an input as well as an output, clients must 318 * initialize it to the offset of the variable in the underlying block, and 319 * this function will adjust it by adding the constant offset of the member 320 * being accessed into that variable. 321 */ 322 void 323 lower_buffer_access::setup_buffer_access(void *mem_ctx, 324 ir_rvalue *deref, 325 ir_rvalue **offset, 326 unsigned *const_offset, 327 bool *row_major, 328 int *matrix_columns, 329 const glsl_struct_field **struct_field, 330 enum glsl_interface_packing packing) 331 { 332 *offset = new(mem_ctx) ir_constant(0u); 333 *row_major = is_dereferenced_thing_row_major(deref); 334 *matrix_columns = 1; 335 336 /* Calculate the offset to the start of the region of the UBO 337 * dereferenced by *rvalue. This may be a variable offset if an 338 * array dereference has a variable index. 339 */ 340 while (deref) { 341 switch (deref->ir_type) { 342 case ir_type_dereference_variable: { 343 deref = NULL; 344 break; 345 } 346 347 case ir_type_dereference_array: { 348 ir_dereference_array *deref_array = (ir_dereference_array *) deref; 349 unsigned array_stride; 350 if (deref_array->array->type->is_vector()) { 351 /* We get this when storing or loading a component out of a vector 352 * with a non-constant index. This happens for v[i] = f where v is 353 * a vector (or m[i][j] = f where m is a matrix). If we don't 354 * lower that here, it gets turned into v = vector_insert(v, i, 355 * f), which loads the entire vector, modifies one component and 356 * then write the entire thing back. That breaks if another 357 * thread or SIMD channel is modifying the same vector. 358 */ 359 array_stride = 4; 360 if (deref_array->array->type->is_64bit()) 361 array_stride *= 2; 362 } else if (deref_array->array->type->is_matrix() && *row_major) { 363 /* When loading a vector out of a row major matrix, the 364 * step between the columns (vectors) is the size of a 365 * float, while the step between the rows (elements of a 366 * vector) is handled below in emit_ubo_loads. 367 */ 368 array_stride = 4; 369 if (deref_array->array->type->is_64bit()) 370 array_stride *= 2; 371 *matrix_columns = deref_array->array->type->matrix_columns; 372 } else if (deref_array->type->without_array()->is_interface()) { 373 /* We're processing an array dereference of an interface instance 374 * array. The thing being dereferenced *must* be a variable 375 * dereference because interfaces cannot be embedded in other 376 * types. In terms of calculating the offsets for the lowering 377 * pass, we don't care about the array index. All elements of an 378 * interface instance array will have the same offsets relative to 379 * the base of the block that backs them. 380 */ 381 deref = deref_array->array->as_dereference(); 382 break; 383 } else { 384 /* Whether or not the field is row-major (because it might be a 385 * bvec2 or something) does not affect the array itself. We need 386 * to know whether an array element in its entirety is row-major. 387 */ 388 const bool array_row_major = 389 is_dereferenced_thing_row_major(deref_array); 390 391 /* The array type will give the correct interface packing 392 * information 393 */ 394 if (packing == GLSL_INTERFACE_PACKING_STD430) { 395 array_stride = deref_array->type->std430_array_stride(array_row_major); 396 } else { 397 array_stride = deref_array->type->std140_size(array_row_major); 398 array_stride = glsl_align(array_stride, 16); 399 } 400 } 401 402 ir_rvalue *array_index = deref_array->array_index; 403 if (array_index->type->base_type == GLSL_TYPE_INT) 404 array_index = i2u(array_index); 405 406 ir_constant *const_index = 407 array_index->constant_expression_value(NULL); 408 if (const_index) { 409 *const_offset += array_stride * const_index->value.u[0]; 410 } else { 411 *offset = add(*offset, 412 mul(array_index, 413 new(mem_ctx) ir_constant(array_stride))); 414 } 415 deref = deref_array->array->as_dereference(); 416 break; 417 } 418 419 case ir_type_dereference_record: { 420 ir_dereference_record *deref_record = (ir_dereference_record *) deref; 421 const glsl_type *struct_type = deref_record->record->type; 422 unsigned intra_struct_offset = 0; 423 424 for (unsigned int i = 0; i < struct_type->length; i++) { 425 const glsl_type *type = struct_type->fields.structure[i].type; 426 427 ir_dereference_record *field_deref = new(mem_ctx) 428 ir_dereference_record(deref_record->record, 429 struct_type->fields.structure[i].name); 430 const bool field_row_major = 431 is_dereferenced_thing_row_major(field_deref); 432 433 ralloc_free(field_deref); 434 435 unsigned field_align = 0; 436 437 if (packing == GLSL_INTERFACE_PACKING_STD430) 438 field_align = type->std430_base_alignment(field_row_major); 439 else 440 field_align = type->std140_base_alignment(field_row_major); 441 442 if (struct_type->fields.structure[i].offset != -1) { 443 intra_struct_offset = struct_type->fields.structure[i].offset; 444 } 445 446 intra_struct_offset = glsl_align(intra_struct_offset, field_align); 447 448 if (strcmp(struct_type->fields.structure[i].name, 449 deref_record->field) == 0) { 450 if (struct_field) 451 *struct_field = &struct_type->fields.structure[i]; 452 break; 453 } 454 455 if (packing == GLSL_INTERFACE_PACKING_STD430) 456 intra_struct_offset += type->std430_size(field_row_major); 457 else 458 intra_struct_offset += type->std140_size(field_row_major); 459 460 /* If the field just examined was itself a structure, apply rule 461 * #9: 462 * 463 * "The structure may have padding at the end; the base offset 464 * of the member following the sub-structure is rounded up to 465 * the next multiple of the base alignment of the structure." 466 */ 467 if (type->without_array()->is_record()) { 468 intra_struct_offset = glsl_align(intra_struct_offset, 469 field_align); 470 471 } 472 } 473 474 *const_offset += intra_struct_offset; 475 deref = deref_record->record->as_dereference(); 476 break; 477 } 478 479 case ir_type_swizzle: { 480 ir_swizzle *deref_swizzle = (ir_swizzle *) deref; 481 482 assert(deref_swizzle->mask.num_components == 1); 483 484 *const_offset += deref_swizzle->mask.x * sizeof(int); 485 deref = deref_swizzle->val->as_dereference(); 486 break; 487 } 488 489 default: 490 assert(!"not reached"); 491 deref = NULL; 492 break; 493 } 494 } 495 } 496 497 } /* namespace lower_buffer_access */ 498