1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "instruction_simplifier_shared.h" 18 19 #include "mirror/array-inl.h" 20 21 namespace art { 22 23 namespace { 24 25 bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, 26 HBinaryOperation* input_binop, 27 HInstruction* input_other) { 28 DCHECK(Primitive::IsIntOrLongType(mul->GetType())); 29 DCHECK(input_binop->IsAdd() || input_binop->IsSub()); 30 DCHECK_NE(input_binop, input_other); 31 if (!input_binop->HasOnlyOneNonEnvironmentUse()) { 32 return false; 33 } 34 35 // Try to interpret patterns like 36 // a * (b <+/-> 1) 37 // as 38 // (a * b) <+/-> a 39 HInstruction* input_a = input_other; 40 HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize. 41 HInstruction::InstructionKind op_kind; 42 43 if (input_binop->IsAdd()) { 44 if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) { 45 // Interpret 46 // a * (b + 1) 47 // as 48 // (a * b) + a 49 input_b = input_binop->GetLeastConstantLeft(); 50 op_kind = HInstruction::kAdd; 51 } 52 } else { 53 DCHECK(input_binop->IsSub()); 54 if (input_binop->GetRight()->IsConstant() && 55 input_binop->GetRight()->AsConstant()->IsMinusOne()) { 56 // Interpret 57 // a * (b - (-1)) 58 // as 59 // a + (a * b) 60 input_b = input_binop->GetLeft(); 61 op_kind = HInstruction::kAdd; 62 } else if (input_binop->GetLeft()->IsConstant() && 63 input_binop->GetLeft()->AsConstant()->IsOne()) { 64 // Interpret 65 // a * (1 - b) 66 // as 67 // a - (a * b) 68 input_b = input_binop->GetRight(); 69 op_kind = HInstruction::kSub; 70 } 71 } 72 73 if (input_b == nullptr) { 74 // We did not find a pattern we can optimize. 75 return false; 76 } 77 78 ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); 79 HMultiplyAccumulate* mulacc = new(arena) HMultiplyAccumulate( 80 mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); 81 82 mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); 83 input_binop->GetBlock()->RemoveInstruction(input_binop); 84 85 return true; 86 } 87 88 } // namespace 89 90 bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) { 91 Primitive::Type type = mul->GetType(); 92 switch (isa) { 93 case kArm: 94 case kThumb2: 95 if (type != Primitive::kPrimInt) { 96 return false; 97 } 98 break; 99 case kArm64: 100 if (!Primitive::IsIntOrLongType(type)) { 101 return false; 102 } 103 break; 104 default: 105 return false; 106 } 107 108 ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); 109 110 if (mul->HasOnlyOneNonEnvironmentUse()) { 111 HInstruction* use = mul->GetUses().front().GetUser(); 112 if (use->IsAdd() || use->IsSub()) { 113 // Replace code looking like 114 // MUL tmp, x, y 115 // SUB dst, acc, tmp 116 // with 117 // MULSUB dst, acc, x, y 118 // Note that we do not want to (unconditionally) perform the merge when the 119 // multiplication has multiple uses and it can be merged in all of them. 120 // Multiple uses could happen on the same control-flow path, and we would 121 // then increase the amount of work. In the future we could try to evaluate 122 // whether all uses are on different control-flow paths (using dominance and 123 // reverse-dominance information) and only perform the merge when they are. 124 HInstruction* accumulator = nullptr; 125 HBinaryOperation* binop = use->AsBinaryOperation(); 126 HInstruction* binop_left = binop->GetLeft(); 127 HInstruction* binop_right = binop->GetRight(); 128 // Be careful after GVN. This should not happen since the `HMul` has only 129 // one use. 130 DCHECK_NE(binop_left, binop_right); 131 if (binop_right == mul) { 132 accumulator = binop_left; 133 } else if (use->IsAdd()) { 134 DCHECK_EQ(binop_left, mul); 135 accumulator = binop_right; 136 } 137 138 if (accumulator != nullptr) { 139 HMultiplyAccumulate* mulacc = 140 new (arena) HMultiplyAccumulate(type, 141 binop->GetKind(), 142 accumulator, 143 mul->GetLeft(), 144 mul->GetRight()); 145 146 binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); 147 DCHECK(!mul->HasUses()); 148 mul->GetBlock()->RemoveInstruction(mul); 149 return true; 150 } 151 } else if (use->IsNeg() && isa != kArm) { 152 HMultiplyAccumulate* mulacc = 153 new (arena) HMultiplyAccumulate(type, 154 HInstruction::kSub, 155 mul->GetBlock()->GetGraph()->GetConstant(type, 0), 156 mul->GetLeft(), 157 mul->GetRight()); 158 159 use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc); 160 DCHECK(!mul->HasUses()); 161 mul->GetBlock()->RemoveInstruction(mul); 162 return true; 163 } 164 } 165 166 // Use multiply accumulate instruction for a few simple patterns. 167 // We prefer not applying the following transformations if the left and 168 // right inputs perform the same operation. 169 // We rely on GVN having squashed the inputs if appropriate. However the 170 // results are still correct even if that did not happen. 171 if (mul->GetLeft() == mul->GetRight()) { 172 return false; 173 } 174 175 HInstruction* left = mul->GetLeft(); 176 HInstruction* right = mul->GetRight(); 177 if ((right->IsAdd() || right->IsSub()) && 178 TrySimpleMultiplyAccumulatePatterns(mul, right->AsBinaryOperation(), left)) { 179 return true; 180 } 181 if ((left->IsAdd() || left->IsSub()) && 182 TrySimpleMultiplyAccumulatePatterns(mul, left->AsBinaryOperation(), right)) { 183 return true; 184 } 185 return false; 186 } 187 188 189 bool TryMergeNegatedInput(HBinaryOperation* op) { 190 DCHECK(op->IsAnd() || op->IsOr() || op->IsXor()) << op->DebugName(); 191 HInstruction* left = op->GetLeft(); 192 HInstruction* right = op->GetRight(); 193 194 // Only consider the case where there is exactly one Not, with 2 Not's De 195 // Morgan's laws should be applied instead. 196 if (left->IsNot() ^ right->IsNot()) { 197 HInstruction* hnot = (left->IsNot() ? left : right); 198 HInstruction* hother = (left->IsNot() ? right : left); 199 200 // Only do the simplification if the Not has only one use and can thus be 201 // safely removed. Even though ARM64 negated bitwise operations do not have 202 // an immediate variant (only register), we still do the simplification when 203 // `hother` is a constant, because it removes an instruction if the constant 204 // cannot be encoded as an immediate: 205 // mov r0, #large_constant 206 // neg r2, r1 207 // and r0, r0, r2 208 // becomes: 209 // mov r0, #large_constant 210 // bic r0, r0, r1 211 if (hnot->HasOnlyOneNonEnvironmentUse()) { 212 // Replace code looking like 213 // NOT tmp, mask 214 // AND dst, src, tmp (respectively ORR, EOR) 215 // with 216 // BIC dst, src, mask (respectively ORN, EON) 217 HInstruction* src = hnot->AsNot()->GetInput(); 218 219 HBitwiseNegatedRight* neg_op = new (hnot->GetBlock()->GetGraph()->GetArena()) 220 HBitwiseNegatedRight(op->GetType(), op->GetKind(), hother, src, op->GetDexPc()); 221 222 op->GetBlock()->ReplaceAndRemoveInstructionWith(op, neg_op); 223 hnot->GetBlock()->RemoveInstruction(hnot); 224 return true; 225 } 226 } 227 228 return false; 229 } 230 231 232 bool TryExtractArrayAccessAddress(HInstruction* access, 233 HInstruction* array, 234 HInstruction* index, 235 size_t data_offset) { 236 if (index->IsConstant() || 237 (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) { 238 // When the index is a constant all the addressing can be fitted in the 239 // memory access instruction, so do not split the access. 240 return false; 241 } 242 if (access->IsArraySet() && 243 access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) { 244 // The access may require a runtime call or the original array pointer. 245 return false; 246 } 247 if (kEmitCompilerReadBarrier && 248 access->IsArrayGet() && 249 access->GetType() == Primitive::kPrimNot) { 250 // For object arrays, the read barrier instrumentation requires 251 // the original array pointer. 252 // TODO: This can be relaxed for Baker CC. 253 return false; 254 } 255 256 // Proceed to extract the base address computation. 257 HGraph* graph = access->GetBlock()->GetGraph(); 258 ArenaAllocator* arena = graph->GetArena(); 259 260 HIntConstant* offset = graph->GetIntConstant(data_offset); 261 HIntermediateAddress* address = new (arena) HIntermediateAddress(array, offset, kNoDexPc); 262 // TODO: Is it ok to not have this on the intermediate address? 263 // address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); 264 access->GetBlock()->InsertInstructionBefore(address, access); 265 access->ReplaceInput(address, 0); 266 // Both instructions must depend on GC to prevent any instruction that can 267 // trigger GC to be inserted between the two. 268 access->AddSideEffects(SideEffects::DependsOnGC()); 269 DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC())); 270 DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC())); 271 // TODO: Code generation for HArrayGet and HArraySet will check whether the input address 272 // is an HIntermediateAddress and generate appropriate code. 273 // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe 274 // `HArm64Load` and `HArm64Store`,`HArmLoad` and `HArmStore`). We defer these changes 275 // because these new instructions would not bring any advantages yet. 276 // Also see the comments in 277 // `InstructionCodeGeneratorARMVIXL::VisitArrayGet()` 278 // `InstructionCodeGeneratorARMVIXL::VisitArraySet()` 279 // `InstructionCodeGeneratorARM64::VisitArrayGet()` 280 // `InstructionCodeGeneratorARM64::VisitArraySet()`. 281 return true; 282 } 283 284 bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) { 285 Primitive::Type type = mul->GetPackedType(); 286 switch (isa) { 287 case kArm64: 288 if (!(type == Primitive::kPrimByte || 289 type == Primitive::kPrimChar || 290 type == Primitive::kPrimShort || 291 type == Primitive::kPrimInt)) { 292 return false; 293 } 294 break; 295 default: 296 return false; 297 } 298 299 ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); 300 301 if (mul->HasOnlyOneNonEnvironmentUse()) { 302 HInstruction* use = mul->GetUses().front().GetUser(); 303 if (use->IsVecAdd() || use->IsVecSub()) { 304 // Replace code looking like 305 // VECMUL tmp, x, y 306 // VECADD/SUB dst, acc, tmp 307 // with 308 // VECMULACC dst, acc, x, y 309 // Note that we do not want to (unconditionally) perform the merge when the 310 // multiplication has multiple uses and it can be merged in all of them. 311 // Multiple uses could happen on the same control-flow path, and we would 312 // then increase the amount of work. In the future we could try to evaluate 313 // whether all uses are on different control-flow paths (using dominance and 314 // reverse-dominance information) and only perform the merge when they are. 315 HInstruction* accumulator = nullptr; 316 HVecBinaryOperation* binop = use->AsVecBinaryOperation(); 317 HInstruction* binop_left = binop->GetLeft(); 318 HInstruction* binop_right = binop->GetRight(); 319 // This is always true since the `HVecMul` has only one use (which is checked above). 320 DCHECK_NE(binop_left, binop_right); 321 if (binop_right == mul) { 322 accumulator = binop_left; 323 } else if (use->IsVecAdd()) { 324 DCHECK_EQ(binop_left, mul); 325 accumulator = binop_right; 326 } 327 328 HInstruction::InstructionKind kind = 329 use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; 330 if (accumulator != nullptr) { 331 HVecMultiplyAccumulate* mulacc = 332 new (arena) HVecMultiplyAccumulate(arena, 333 kind, 334 accumulator, 335 mul->GetLeft(), 336 mul->GetRight(), 337 binop->GetPackedType(), 338 binop->GetVectorLength()); 339 340 binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); 341 DCHECK(!mul->HasUses()); 342 mul->GetBlock()->RemoveInstruction(mul); 343 return true; 344 } 345 } 346 } 347 348 return false; 349 } 350 351 bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) { 352 if (index->IsConstant()) { 353 // If index is constant the whole address calculation often can be done by LDR/STR themselves. 354 // TODO: Treat the case with not-embedable constant. 355 return false; 356 } 357 358 HGraph* graph = access->GetBlock()->GetGraph(); 359 ArenaAllocator* arena = graph->GetArena(); 360 Primitive::Type packed_type = access->GetPackedType(); 361 uint32_t data_offset = mirror::Array::DataOffset( 362 Primitive::ComponentSize(packed_type)).Uint32Value(); 363 size_t component_shift = Primitive::ComponentSizeShift(packed_type); 364 365 bool is_extracting_beneficial = false; 366 // It is beneficial to extract index intermediate address only if there are at least 2 users. 367 for (const HUseListNode<HInstruction*>& use : index->GetUses()) { 368 HInstruction* user = use.GetUser(); 369 if (user->IsVecMemoryOperation() && user != access) { 370 HVecMemoryOperation* another_access = user->AsVecMemoryOperation(); 371 Primitive::Type another_packed_type = another_access->GetPackedType(); 372 uint32_t another_data_offset = mirror::Array::DataOffset( 373 Primitive::ComponentSize(another_packed_type)).Uint32Value(); 374 size_t another_component_shift = Primitive::ComponentSizeShift(another_packed_type); 375 if (another_data_offset == data_offset && another_component_shift == component_shift) { 376 is_extracting_beneficial = true; 377 break; 378 } 379 } else if (user->IsIntermediateAddressIndex()) { 380 HIntermediateAddressIndex* another_access = user->AsIntermediateAddressIndex(); 381 uint32_t another_data_offset = another_access->GetOffset()->AsIntConstant()->GetValue(); 382 size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue(); 383 if (another_data_offset == data_offset && another_component_shift == component_shift) { 384 is_extracting_beneficial = true; 385 break; 386 } 387 } 388 } 389 390 if (!is_extracting_beneficial) { 391 return false; 392 } 393 394 // Proceed to extract the index + data_offset address computation. 395 HIntConstant* offset = graph->GetIntConstant(data_offset); 396 HIntConstant* shift = graph->GetIntConstant(component_shift); 397 HIntermediateAddressIndex* address = 398 new (arena) HIntermediateAddressIndex(index, offset, shift, kNoDexPc); 399 400 access->GetBlock()->InsertInstructionBefore(address, access); 401 access->ReplaceInput(address, 1); 402 403 return true; 404 } 405 406 } // namespace art 407