1 /* 2 * Copyright 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file opt_vectorize.cpp 26 * 27 * Combines scalar assignments of the same expression (modulo swizzle) to 28 * multiple channels of the same variable into a single vectorized expression 29 * and assignment. 30 * 31 * Many generated shaders contain scalarized code. That is, they contain 32 * 33 * r1.x = log2(v0.x); 34 * r1.y = log2(v0.y); 35 * r1.z = log2(v0.z); 36 * 37 * rather than 38 * 39 * r1.xyz = log2(v0.xyz); 40 * 41 * We look for consecutive assignments of the same expression (modulo swizzle) 42 * to each channel of the same variable. 43 * 44 * For instance, we want to convert these three scalar operations 45 * 46 * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0)))) 47 * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0)))) 48 * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0)))) 49 * 50 * into a single vector operation 51 * 52 * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0)))) 53 */ 54 55 #include "ir.h" 56 #include "ir_visitor.h" 57 #include "ir_optimization.h" 58 #include "compiler/glsl_types.h" 59 #include "program/prog_instruction.h" 60 61 namespace { 62 63 class ir_vectorize_visitor : public ir_hierarchical_visitor { 64 public: 65 void clear() 66 { 67 assignment[0] = NULL; 68 assignment[1] = NULL; 69 assignment[2] = NULL; 70 assignment[3] = NULL; 71 current_assignment = NULL; 72 last_assignment = NULL; 73 channels = 0; 74 has_swizzle = false; 75 } 76 77 ir_vectorize_visitor() 78 { 79 clear(); 80 progress = false; 81 } 82 83 virtual ir_visitor_status visit_enter(ir_assignment *); 84 virtual ir_visitor_status visit_enter(ir_swizzle *); 85 virtual ir_visitor_status visit_enter(ir_dereference_array *); 86 virtual ir_visitor_status visit_enter(ir_expression *); 87 virtual ir_visitor_status visit_enter(ir_if *); 88 virtual ir_visitor_status visit_enter(ir_loop *); 89 virtual ir_visitor_status visit_enter(ir_texture *); 90 91 virtual ir_visitor_status visit_leave(ir_assignment *); 92 93 void try_vectorize(); 94 95 ir_assignment *assignment[4]; 96 ir_assignment *current_assignment, *last_assignment; 97 unsigned channels; 98 bool has_swizzle; 99 100 bool progress; 101 }; 102 103 } /* unnamed namespace */ 104 105 /** 106 * Rewrites the swizzles and types of a right-hand side of an assignment. 107 * 108 * From the example above, this function would be called (by visit_tree()) on 109 * the nodes of the tree (expression float log2 (swiz z (var_ref v0))), 110 * rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))). 111 * 112 * The function operates on ir_expressions (and its operands) and ir_swizzles. 113 * For expressions it sets a new type and swizzles any non-expression and non- 114 * swizzle scalar operands into appropriately sized vector arguments. For 115 * example, if combining 116 * 117 * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1)))) 118 * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1)))) 119 * 120 * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on 121 * (var_ref v1) such that the final result was 122 * 123 * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0)) 124 * (swiz xx (var_ref v1)))) 125 * 126 * For swizzles, it sets a new type, and if the variable being swizzled is a 127 * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the 128 * data parameter. If the swizzled variable is scalar, then the swizzle was 129 * added by an earlier call to rewrite_swizzle() on an expression, so the 130 * mask should not be modified. 131 */ 132 static void 133 rewrite_swizzle(ir_instruction *ir, void *data) 134 { 135 ir_swizzle_mask *mask = (ir_swizzle_mask *)data; 136 137 switch (ir->ir_type) { 138 case ir_type_swizzle: { 139 ir_swizzle *swz = (ir_swizzle *)ir; 140 if (swz->val->type->is_vector()) { 141 swz->mask = *mask; 142 } 143 swz->type = glsl_type::get_instance(swz->type->base_type, 144 mask->num_components, 1); 145 break; 146 } 147 case ir_type_expression: { 148 ir_expression *expr = (ir_expression *)ir; 149 expr->type = glsl_type::get_instance(expr->type->base_type, 150 mask->num_components, 1); 151 for (unsigned i = 0; i < 4; i++) { 152 if (expr->operands[i]) { 153 ir_rvalue *rval = expr->operands[i]->as_rvalue(); 154 if (rval && rval->type->is_scalar() && 155 !rval->as_expression() && !rval->as_swizzle()) { 156 expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0, 157 mask->num_components); 158 } 159 } 160 } 161 break; 162 } 163 default: 164 break; 165 } 166 } 167 168 /** 169 * Attempt to vectorize the previously saved assignments, and clear them from 170 * consideration. 171 * 172 * If the assignments are able to be combined, it modifies in-place the last 173 * assignment seen to be an equivalent vector form of the scalar assignments. 174 * It then removes the other now obsolete scalar assignments. 175 */ 176 void 177 ir_vectorize_visitor::try_vectorize() 178 { 179 if (this->last_assignment && this->channels > 1) { 180 ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0}; 181 182 this->last_assignment->write_mask = 0; 183 184 for (unsigned i = 0, j = 0; i < 4; i++) { 185 if (this->assignment[i]) { 186 this->last_assignment->write_mask |= 1 << i; 187 188 if (this->assignment[i] != this->last_assignment) { 189 this->assignment[i]->remove(); 190 } 191 192 switch (j) { 193 case 0: mask.x = i; break; 194 case 1: mask.y = i; break; 195 case 2: mask.z = i; break; 196 case 3: mask.w = i; break; 197 } 198 199 j++; 200 } 201 } 202 203 visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask); 204 205 this->progress = true; 206 } 207 clear(); 208 } 209 210 /** 211 * Returns whether the write mask is a single channel. 212 */ 213 static bool 214 single_channel_write_mask(unsigned write_mask) 215 { 216 return write_mask != 0 && (write_mask & (write_mask - 1)) == 0; 217 } 218 219 /** 220 * Translates single-channeled write mask to single-channeled swizzle. 221 */ 222 static unsigned 223 write_mask_to_swizzle(unsigned write_mask) 224 { 225 switch (write_mask) { 226 case WRITEMASK_X: return SWIZZLE_X; 227 case WRITEMASK_Y: return SWIZZLE_Y; 228 case WRITEMASK_Z: return SWIZZLE_Z; 229 case WRITEMASK_W: return SWIZZLE_W; 230 } 231 unreachable("not reached"); 232 } 233 234 /** 235 * Returns whether a single-channeled write mask matches a swizzle. 236 */ 237 static bool 238 write_mask_matches_swizzle(unsigned write_mask, 239 const ir_swizzle *swz) 240 { 241 return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) || 242 (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) || 243 (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) || 244 (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W)); 245 } 246 247 /** 248 * Upon entering an ir_assignment, attempt to vectorize the currently tracked 249 * assignments if the current assignment is not suitable. Keep a pointer to 250 * the current assignment. 251 */ 252 ir_visitor_status 253 ir_vectorize_visitor::visit_enter(ir_assignment *ir) 254 { 255 ir_dereference *lhs = this->last_assignment != NULL ? 256 this->last_assignment->lhs : NULL; 257 ir_rvalue *rhs = this->last_assignment != NULL ? 258 this->last_assignment->rhs : NULL; 259 260 if (ir->condition || 261 this->channels >= 4 || 262 !single_channel_write_mask(ir->write_mask) || 263 this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL || 264 (lhs && !ir->lhs->equals(lhs)) || 265 (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) { 266 try_vectorize(); 267 } 268 269 this->current_assignment = ir; 270 271 return visit_continue; 272 } 273 274 /** 275 * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an 276 * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask 277 * matches the current assignment's write mask. 278 * 279 * If the write mask doesn't match the swizzle mask, remove the current 280 * assignment from further consideration. 281 */ 282 ir_visitor_status 283 ir_vectorize_visitor::visit_enter(ir_swizzle *ir) 284 { 285 if (this->current_assignment) { 286 if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) { 287 this->has_swizzle = true; 288 } else { 289 this->current_assignment = NULL; 290 } 291 } 292 return visit_continue; 293 } 294 295 /* Upon entering an ir_array_dereference, remove the current assignment from 296 * further consideration. Since the index of an array dereference must scalar, 297 * we are not able to vectorize it. 298 * 299 * FINISHME: If all of scalar indices are identical we could vectorize. 300 */ 301 ir_visitor_status 302 ir_vectorize_visitor::visit_enter(ir_dereference_array *) 303 { 304 this->current_assignment = NULL; 305 return visit_continue_with_parent; 306 } 307 308 /** 309 * Upon entering an ir_expression, remove the current assignment from further 310 * consideration if the expression operates horizontally on vectors. 311 */ 312 ir_visitor_status 313 ir_vectorize_visitor::visit_enter(ir_expression *ir) 314 { 315 if (ir->is_horizontal()) { 316 this->current_assignment = NULL; 317 return visit_continue_with_parent; 318 } 319 return visit_continue; 320 } 321 322 /* Since there is no statement to visit between the "then" and "else" 323 * instructions try to vectorize before, in between, and after them to avoid 324 * combining statements from different basic blocks. 325 */ 326 ir_visitor_status 327 ir_vectorize_visitor::visit_enter(ir_if *ir) 328 { 329 try_vectorize(); 330 331 visit_list_elements(this, &ir->then_instructions); 332 try_vectorize(); 333 334 visit_list_elements(this, &ir->else_instructions); 335 try_vectorize(); 336 337 return visit_continue_with_parent; 338 } 339 340 /* Since there is no statement to visit between the instructions in the body of 341 * the loop and the instructions after it try to vectorize before and after the 342 * body to avoid combining statements from different basic blocks. 343 */ 344 ir_visitor_status 345 ir_vectorize_visitor::visit_enter(ir_loop *ir) 346 { 347 try_vectorize(); 348 349 visit_list_elements(this, &ir->body_instructions); 350 try_vectorize(); 351 352 return visit_continue_with_parent; 353 } 354 355 /** 356 * Upon entering an ir_texture, remove the current assignment from 357 * further consideration. Vectorizing multiple texture lookups into one 358 * is wrong. 359 */ 360 ir_visitor_status 361 ir_vectorize_visitor::visit_enter(ir_texture *) 362 { 363 this->current_assignment = NULL; 364 return visit_continue_with_parent; 365 } 366 367 /** 368 * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if 369 * the swizzle mask(s) found were appropriate. Also save a pointer in 370 * ::last_assignment so that we can compare future assignments with it. 371 * 372 * Finally, clear ::current_assignment and ::has_swizzle. 373 */ 374 ir_visitor_status 375 ir_vectorize_visitor::visit_leave(ir_assignment *ir) 376 { 377 if (this->has_swizzle && this->current_assignment) { 378 assert(this->current_assignment == ir); 379 380 unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask); 381 this->assignment[channel] = ir; 382 this->channels++; 383 384 this->last_assignment = this->current_assignment; 385 } 386 this->current_assignment = NULL; 387 this->has_swizzle = false; 388 return visit_continue; 389 } 390 391 /** 392 * Combines scalar assignments of the same expression (modulo swizzle) to 393 * multiple channels of the same variable into a single vectorized expression 394 * and assignment. 395 */ 396 bool 397 do_vectorize(exec_list *instructions) 398 { 399 ir_vectorize_visitor v; 400 401 v.run(instructions); 402 403 /* Try to vectorize the last assignments seen. */ 404 v.try_vectorize(); 405 406 return v.progress; 407 } 408