Home | History | Annotate | Download | only in arm
      1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
      2 // All Rights Reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions
      6 // are met:
      7 //
      8 // - Redistributions of source code must retain the above copyright notice,
      9 // this list of conditions and the following disclaimer.
     10 //
     11 // - Redistribution in binary form must reproduce the above copyright
     12 // notice, this list of conditions and the following disclaimer in the
     13 // documentation and/or other materials provided with the
     14 // distribution.
     15 //
     16 // - Neither the name of Sun Microsystems or the names of contributors may
     17 // be used to endorse or promote products derived from this software without
     18 // specific prior written permission.
     19 //
     20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     23 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     24 // COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     25 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     26 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     27 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     29 // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     30 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
     31 // OF THE POSSIBILITY OF SUCH DAMAGE.
     32 
     33 // The original source code covered by the above license above has been
     34 // modified significantly by Google Inc.
     35 // Copyright 2012 the V8 project authors. All rights reserved.
     36 
     37 #include "src/arm/assembler-arm.h"
     38 
     39 #if V8_TARGET_ARCH_ARM
     40 
     41 #include "src/arm/assembler-arm-inl.h"
     42 #include "src/assembler-inl.h"
     43 #include "src/base/bits.h"
     44 #include "src/base/cpu.h"
     45 #include "src/code-stubs.h"
     46 #include "src/deoptimizer.h"
     47 #include "src/macro-assembler.h"
     48 #include "src/objects-inl.h"
     49 
     50 namespace v8 {
     51 namespace internal {
     52 
     53 static const unsigned kArmv6 = 0u;
     54 static const unsigned kArmv7 = kArmv6 | (1u << ARMv7);
     55 static const unsigned kArmv7WithSudiv = kArmv7 | (1u << ARMv7_SUDIV);
     56 static const unsigned kArmv8 = kArmv7WithSudiv | (1u << ARMv8);
     57 
     58 static unsigned CpuFeaturesFromCommandLine() {
     59   unsigned result;
     60   if (strcmp(FLAG_arm_arch, "armv8") == 0) {
     61     result = kArmv8;
     62   } else if (strcmp(FLAG_arm_arch, "armv7+sudiv") == 0) {
     63     result = kArmv7WithSudiv;
     64   } else if (strcmp(FLAG_arm_arch, "armv7") == 0) {
     65     result = kArmv7;
     66   } else if (strcmp(FLAG_arm_arch, "armv6") == 0) {
     67     result = kArmv6;
     68   } else {
     69     fprintf(stderr, "Error: unrecognised value for --arm-arch ('%s').\n",
     70             FLAG_arm_arch);
     71     fprintf(stderr,
     72             "Supported values are:  armv8\n"
     73             "                       armv7+sudiv\n"
     74             "                       armv7\n"
     75             "                       armv6\n");
     76     FATAL("arm-arch");
     77   }
     78 
     79   // If any of the old (deprecated) flags are specified, print a warning, but
     80   // otherwise try to respect them for now.
     81   // TODO(jbramley): When all the old bots have been updated, remove this.
     82   if (FLAG_enable_armv7.has_value || FLAG_enable_vfp3.has_value ||
     83       FLAG_enable_32dregs.has_value || FLAG_enable_neon.has_value ||
     84       FLAG_enable_sudiv.has_value || FLAG_enable_armv8.has_value) {
     85     // As an approximation of the old behaviour, set the default values from the
     86     // arm_arch setting, then apply the flags over the top.
     87     bool enable_armv7 = (result & (1u << ARMv7)) != 0;
     88     bool enable_vfp3 = (result & (1u << ARMv7)) != 0;
     89     bool enable_32dregs = (result & (1u << ARMv7)) != 0;
     90     bool enable_neon = (result & (1u << ARMv7)) != 0;
     91     bool enable_sudiv = (result & (1u << ARMv7_SUDIV)) != 0;
     92     bool enable_armv8 = (result & (1u << ARMv8)) != 0;
     93     if (FLAG_enable_armv7.has_value) {
     94       fprintf(stderr,
     95               "Warning: --enable_armv7 is deprecated. "
     96               "Use --arm_arch instead.\n");
     97       enable_armv7 = FLAG_enable_armv7.value;
     98     }
     99     if (FLAG_enable_vfp3.has_value) {
    100       fprintf(stderr,
    101               "Warning: --enable_vfp3 is deprecated. "
    102               "Use --arm_arch instead.\n");
    103       enable_vfp3 = FLAG_enable_vfp3.value;
    104     }
    105     if (FLAG_enable_32dregs.has_value) {
    106       fprintf(stderr,
    107               "Warning: --enable_32dregs is deprecated. "
    108               "Use --arm_arch instead.\n");
    109       enable_32dregs = FLAG_enable_32dregs.value;
    110     }
    111     if (FLAG_enable_neon.has_value) {
    112       fprintf(stderr,
    113               "Warning: --enable_neon is deprecated. "
    114               "Use --arm_arch instead.\n");
    115       enable_neon = FLAG_enable_neon.value;
    116     }
    117     if (FLAG_enable_sudiv.has_value) {
    118       fprintf(stderr,
    119               "Warning: --enable_sudiv is deprecated. "
    120               "Use --arm_arch instead.\n");
    121       enable_sudiv = FLAG_enable_sudiv.value;
    122     }
    123     if (FLAG_enable_armv8.has_value) {
    124       fprintf(stderr,
    125               "Warning: --enable_armv8 is deprecated. "
    126               "Use --arm_arch instead.\n");
    127       enable_armv8 = FLAG_enable_armv8.value;
    128     }
    129     // Emulate the old implications.
    130     if (enable_armv8) {
    131       enable_vfp3 = true;
    132       enable_neon = true;
    133       enable_32dregs = true;
    134       enable_sudiv = true;
    135     }
    136     // Select the best available configuration.
    137     if (enable_armv7 && enable_vfp3 && enable_32dregs && enable_neon) {
    138       if (enable_sudiv) {
    139         if (enable_armv8) {
    140           result = kArmv8;
    141         } else {
    142           result = kArmv7WithSudiv;
    143         }
    144       } else {
    145         result = kArmv7;
    146       }
    147     } else {
    148       result = kArmv6;
    149     }
    150   }
    151   return result;
    152 }
    153 
    154 // Get the CPU features enabled by the build.
    155 // For cross compilation the preprocessor symbols such as
    156 // CAN_USE_ARMV7_INSTRUCTIONS and CAN_USE_VFP3_INSTRUCTIONS can be used to
    157 // enable ARMv7 and VFPv3 instructions when building the snapshot. However,
    158 // these flags should be consistent with a supported ARM configuration:
    159 //  "armv6":       ARMv6 + VFPv2
    160 //  "armv7":       ARMv7 + VFPv3-D32 + NEON
    161 //  "armv7+sudiv": ARMv7 + VFPv4-D32 + NEON + SUDIV
    162 //  "armv8":       ARMv8 (+ all of the above)
    163 static constexpr unsigned CpuFeaturesFromCompiler() {
    164 // TODO(jbramley): Once the build flags are simplified, these tests should
    165 // also be simplified.
    166 
    167 // Check *architectural* implications.
    168 #if defined(CAN_USE_ARMV8_INSTRUCTIONS) && !defined(CAN_USE_ARMV7_INSTRUCTIONS)
    169 #error "CAN_USE_ARMV8_INSTRUCTIONS should imply CAN_USE_ARMV7_INSTRUCTIONS"
    170 #endif
    171 #if defined(CAN_USE_ARMV8_INSTRUCTIONS) && !defined(CAN_USE_SUDIV)
    172 #error "CAN_USE_ARMV8_INSTRUCTIONS should imply CAN_USE_SUDIV"
    173 #endif
    174 #if defined(CAN_USE_ARMV7_INSTRUCTIONS) != defined(CAN_USE_VFP3_INSTRUCTIONS)
    175 // V8 requires VFP, and all ARMv7 devices with VFP have VFPv3. Similarly,
    176 // VFPv3 isn't available before ARMv7.
    177 #error "CAN_USE_ARMV7_INSTRUCTIONS should match CAN_USE_VFP3_INSTRUCTIONS"
    178 #endif
    179 #if defined(CAN_USE_NEON) && !defined(CAN_USE_ARMV7_INSTRUCTIONS)
    180 #error "CAN_USE_NEON should imply CAN_USE_ARMV7_INSTRUCTIONS"
    181 #endif
    182 
    183 // Find compiler-implied features.
    184 #if defined(CAN_USE_ARMV8_INSTRUCTIONS) &&                           \
    185     defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_SUDIV) && \
    186     defined(CAN_USE_NEON) && defined(CAN_USE_VFP3_INSTRUCTIONS)
    187   return kArmv8;
    188 #elif defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_SUDIV) && \
    189     defined(CAN_USE_NEON) && defined(CAN_USE_VFP3_INSTRUCTIONS)
    190   return kArmv7WithSudiv;
    191 #elif defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_NEON) && \
    192     defined(CAN_USE_VFP3_INSTRUCTIONS)
    193   return kArmv7;
    194 #else
    195   return kArmv6;
    196 #endif
    197 }
    198 
    199 
    200 void CpuFeatures::ProbeImpl(bool cross_compile) {
    201   dcache_line_size_ = 64;
    202 
    203   unsigned command_line = CpuFeaturesFromCommandLine();
    204   // Only use statically determined features for cross compile (snapshot).
    205   if (cross_compile) {
    206     supported_ |= command_line & CpuFeaturesFromCompiler();
    207     return;
    208   }
    209 
    210 #ifndef __arm__
    211   // For the simulator build, use whatever the flags specify.
    212   supported_ |= command_line;
    213 
    214 #else  // __arm__
    215   // Probe for additional features at runtime.
    216   base::CPU cpu;
    217   // Runtime detection is slightly fuzzy, and some inferences are necessary.
    218   unsigned runtime = kArmv6;
    219   // NEON and VFPv3 imply at least ARMv7-A.
    220   if (cpu.has_neon() && cpu.has_vfp3_d32()) {
    221     DCHECK(cpu.has_vfp3());
    222     runtime |= kArmv7;
    223     if (cpu.has_idiva()) {
    224       runtime |= kArmv7WithSudiv;
    225       if (cpu.architecture() >= 8) {
    226         runtime |= kArmv8;
    227       }
    228     }
    229   }
    230 
    231   // Use the best of the features found by CPU detection and those inferred from
    232   // the build system. In both cases, restrict available features using the
    233   // command-line. Note that the command-line flags are very permissive (kArmv8)
    234   // by default.
    235   supported_ |= command_line & CpuFeaturesFromCompiler();
    236   supported_ |= command_line & runtime;
    237 
    238   // Additional tuning options.
    239 
    240   // ARM Cortex-A9 and Cortex-A5 have 32 byte cachelines.
    241   if (cpu.implementer() == base::CPU::ARM &&
    242       (cpu.part() == base::CPU::ARM_CORTEX_A5 ||
    243        cpu.part() == base::CPU::ARM_CORTEX_A9)) {
    244     dcache_line_size_ = 32;
    245   }
    246 #endif
    247 
    248   DCHECK_IMPLIES(IsSupported(ARMv7_SUDIV), IsSupported(ARMv7));
    249   DCHECK_IMPLIES(IsSupported(ARMv8), IsSupported(ARMv7_SUDIV));
    250 }
    251 
    252 
    253 void CpuFeatures::PrintTarget() {
    254   const char* arm_arch = nullptr;
    255   const char* arm_target_type = "";
    256   const char* arm_no_probe = "";
    257   const char* arm_fpu = "";
    258   const char* arm_thumb = "";
    259   const char* arm_float_abi = nullptr;
    260 
    261 #if !defined __arm__
    262   arm_target_type = " simulator";
    263 #endif
    264 
    265 #if defined ARM_TEST_NO_FEATURE_PROBE
    266   arm_no_probe = " noprobe";
    267 #endif
    268 
    269 #if defined CAN_USE_ARMV8_INSTRUCTIONS
    270   arm_arch = "arm v8";
    271 #elif defined CAN_USE_ARMV7_INSTRUCTIONS
    272   arm_arch = "arm v7";
    273 #else
    274   arm_arch = "arm v6";
    275 #endif
    276 
    277 #if defined CAN_USE_NEON
    278   arm_fpu = " neon";
    279 #elif defined CAN_USE_VFP3_INSTRUCTIONS
    280 #  if defined CAN_USE_VFP32DREGS
    281   arm_fpu = " vfp3";
    282 #  else
    283   arm_fpu = " vfp3-d16";
    284 #  endif
    285 #else
    286   arm_fpu = " vfp2";
    287 #endif
    288 
    289 #ifdef __arm__
    290   arm_float_abi = base::OS::ArmUsingHardFloat() ? "hard" : "softfp";
    291 #elif USE_EABI_HARDFLOAT
    292   arm_float_abi = "hard";
    293 #else
    294   arm_float_abi = "softfp";
    295 #endif
    296 
    297 #if defined __arm__ && (defined __thumb__) || (defined __thumb2__)
    298   arm_thumb = " thumb";
    299 #endif
    300 
    301   printf("target%s%s %s%s%s %s\n",
    302          arm_target_type, arm_no_probe, arm_arch, arm_fpu, arm_thumb,
    303          arm_float_abi);
    304 }
    305 
    306 
    307 void CpuFeatures::PrintFeatures() {
    308   printf("ARMv8=%d ARMv7=%d VFPv3=%d VFP32DREGS=%d NEON=%d SUDIV=%d",
    309          CpuFeatures::IsSupported(ARMv8), CpuFeatures::IsSupported(ARMv7),
    310          CpuFeatures::IsSupported(VFPv3), CpuFeatures::IsSupported(VFP32DREGS),
    311          CpuFeatures::IsSupported(NEON), CpuFeatures::IsSupported(SUDIV));
    312 #ifdef __arm__
    313   bool eabi_hardfloat = base::OS::ArmUsingHardFloat();
    314 #elif USE_EABI_HARDFLOAT
    315   bool eabi_hardfloat = true;
    316 #else
    317   bool eabi_hardfloat = false;
    318 #endif
    319   printf(" USE_EABI_HARDFLOAT=%d\n", eabi_hardfloat);
    320 }
    321 
    322 
    323 // -----------------------------------------------------------------------------
    324 // Implementation of RelocInfo
    325 
    326 // static
    327 const int RelocInfo::kApplyMask =
    328     RelocInfo::ModeMask(RelocInfo::RELATIVE_CODE_TARGET);
    329 
    330 bool RelocInfo::IsCodedSpecially() {
    331   // The deserializer needs to know whether a pointer is specially coded. Being
    332   // specially coded on ARM means that it is a movw/movt instruction. We don't
    333   // generate those for relocatable pointers.
    334   return false;
    335 }
    336 
    337 bool RelocInfo::IsInConstantPool() {
    338   return Assembler::is_constant_pool_load(pc_);
    339 }
    340 
    341 int RelocInfo::GetDeoptimizationId(Isolate* isolate, DeoptimizeKind kind) {
    342   DCHECK(IsRuntimeEntry(rmode_));
    343   return Deoptimizer::GetDeoptimizationId(isolate, target_address(), kind);
    344 }
    345 
    346 void RelocInfo::set_js_to_wasm_address(Address address,
    347                                        ICacheFlushMode icache_flush_mode) {
    348   DCHECK_EQ(rmode_, JS_TO_WASM_CALL);
    349   Assembler::set_target_address_at(pc_, constant_pool_, address,
    350                                    icache_flush_mode);
    351 }
    352 
    353 Address RelocInfo::js_to_wasm_address() const {
    354   DCHECK_EQ(rmode_, JS_TO_WASM_CALL);
    355   return Assembler::target_address_at(pc_, constant_pool_);
    356 }
    357 
    358 uint32_t RelocInfo::wasm_call_tag() const {
    359   DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
    360   return static_cast<uint32_t>(
    361       Assembler::target_address_at(pc_, constant_pool_));
    362 }
    363 
    364 // -----------------------------------------------------------------------------
    365 // Implementation of Operand and MemOperand
    366 // See assembler-arm-inl.h for inlined constructors
    367 
    368 Operand::Operand(Handle<HeapObject> handle) {
    369   rm_ = no_reg;
    370   value_.immediate = static_cast<intptr_t>(handle.address());
    371   rmode_ = RelocInfo::EMBEDDED_OBJECT;
    372 }
    373 
    374 
    375 Operand::Operand(Register rm, ShiftOp shift_op, int shift_imm) {
    376   DCHECK(is_uint5(shift_imm));
    377 
    378   rm_ = rm;
    379   rs_ = no_reg;
    380   shift_op_ = shift_op;
    381   shift_imm_ = shift_imm & 31;
    382 
    383   if ((shift_op == ROR) && (shift_imm == 0)) {
    384     // ROR #0 is functionally equivalent to LSL #0 and this allow us to encode
    385     // RRX as ROR #0 (See below).
    386     shift_op = LSL;
    387   } else if (shift_op == RRX) {
    388     // encoded as ROR with shift_imm == 0
    389     DCHECK_EQ(shift_imm, 0);
    390     shift_op_ = ROR;
    391     shift_imm_ = 0;
    392   }
    393 }
    394 
    395 
    396 Operand::Operand(Register rm, ShiftOp shift_op, Register rs) {
    397   DCHECK(shift_op != RRX);
    398   rm_ = rm;
    399   rs_ = no_reg;
    400   shift_op_ = shift_op;
    401   rs_ = rs;
    402 }
    403 
    404 Operand Operand::EmbeddedNumber(double value) {
    405   int32_t smi;
    406   if (DoubleToSmiInteger(value, &smi)) return Operand(Smi::FromInt(smi));
    407   Operand result(0, RelocInfo::EMBEDDED_OBJECT);
    408   result.is_heap_object_request_ = true;
    409   result.value_.heap_object_request = HeapObjectRequest(value);
    410   return result;
    411 }
    412 
    413 Operand Operand::EmbeddedCode(CodeStub* stub) {
    414   Operand result(0, RelocInfo::CODE_TARGET);
    415   result.is_heap_object_request_ = true;
    416   result.value_.heap_object_request = HeapObjectRequest(stub);
    417   return result;
    418 }
    419 
    420 MemOperand::MemOperand(Register rn, int32_t offset, AddrMode am)
    421     : rn_(rn), rm_(no_reg), offset_(offset), am_(am) {
    422   // Accesses below the stack pointer are not safe, and are prohibited by the
    423   // ABI. We can check obvious violations here.
    424   if (rn == sp) {
    425     if (am == Offset) DCHECK_LE(0, offset);
    426     if (am == NegOffset) DCHECK_GE(0, offset);
    427   }
    428 }
    429 
    430 MemOperand::MemOperand(Register rn, Register rm, AddrMode am)
    431     : rn_(rn), rm_(rm), shift_op_(LSL), shift_imm_(0), am_(am) {}
    432 
    433 MemOperand::MemOperand(Register rn, Register rm, ShiftOp shift_op,
    434                        int shift_imm, AddrMode am)
    435     : rn_(rn),
    436       rm_(rm),
    437       shift_op_(shift_op),
    438       shift_imm_(shift_imm & 31),
    439       am_(am) {
    440   DCHECK(is_uint5(shift_imm));
    441 }
    442 
    443 NeonMemOperand::NeonMemOperand(Register rn, AddrMode am, int align)
    444     : rn_(rn), rm_(am == Offset ? pc : sp) {
    445   DCHECK((am == Offset) || (am == PostIndex));
    446   SetAlignment(align);
    447 }
    448 
    449 NeonMemOperand::NeonMemOperand(Register rn, Register rm, int align)
    450     : rn_(rn), rm_(rm) {
    451   SetAlignment(align);
    452 }
    453 
    454 void NeonMemOperand::SetAlignment(int align) {
    455   switch (align) {
    456     case 0:
    457       align_ = 0;
    458       break;
    459     case 64:
    460       align_ = 1;
    461       break;
    462     case 128:
    463       align_ = 2;
    464       break;
    465     case 256:
    466       align_ = 3;
    467       break;
    468     default:
    469       UNREACHABLE();
    470       break;
    471   }
    472 }
    473 
    474 void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) {
    475   for (auto& request : heap_object_requests_) {
    476     Handle<HeapObject> object;
    477     switch (request.kind()) {
    478       case HeapObjectRequest::kHeapNumber:
    479         object =
    480             isolate->factory()->NewHeapNumber(request.heap_number(), TENURED);
    481         break;
    482       case HeapObjectRequest::kCodeStub:
    483         request.code_stub()->set_isolate(isolate);
    484         object = request.code_stub()->GetCode();
    485         break;
    486     }
    487     Address pc = reinterpret_cast<Address>(buffer_) + request.offset();
    488     Memory<Address>(constant_pool_entry_address(pc, 0 /* unused */)) =
    489         object.address();
    490   }
    491 }
    492 
    493 // -----------------------------------------------------------------------------
    494 // Specific instructions, constants, and masks.
    495 
    496 // str(r, MemOperand(sp, 4, NegPreIndex), al) instruction (aka push(r))
    497 // register r is not encoded.
    498 const Instr kPushRegPattern = al | B26 | 4 | NegPreIndex | sp.code() * B16;
    499 // ldr(r, MemOperand(sp, 4, PostIndex), al) instruction (aka pop(r))
    500 // register r is not encoded.
    501 const Instr kPopRegPattern = al | B26 | L | 4 | PostIndex | sp.code() * B16;
    502 // ldr rd, [pc, #offset]
    503 const Instr kLdrPCImmedMask = 15 * B24 | 7 * B20 | 15 * B16;
    504 const Instr kLdrPCImmedPattern = 5 * B24 | L | pc.code() * B16;
    505 // vldr dd, [pc, #offset]
    506 const Instr kVldrDPCMask = 15 * B24 | 3 * B20 | 15 * B16 | 15 * B8;
    507 const Instr kVldrDPCPattern = 13 * B24 | L | pc.code() * B16 | 11 * B8;
    508 // blxcc rm
    509 const Instr kBlxRegMask =
    510     15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4;
    511 const Instr kBlxRegPattern =
    512     B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BLX;
    513 const Instr kBlxIp = al | kBlxRegPattern | ip.code();
    514 const Instr kMovMvnMask = 0x6D * B21 | 0xF * B16;
    515 const Instr kMovMvnPattern = 0xD * B21;
    516 const Instr kMovMvnFlip = B22;
    517 const Instr kMovLeaveCCMask = 0xDFF * B16;
    518 const Instr kMovLeaveCCPattern = 0x1A0 * B16;
    519 const Instr kMovwPattern = 0x30 * B20;
    520 const Instr kMovtPattern = 0x34 * B20;
    521 const Instr kMovwLeaveCCFlip = 0x5 * B21;
    522 const Instr kMovImmedMask = 0x7F * B21;
    523 const Instr kMovImmedPattern = 0x1D * B21;
    524 const Instr kOrrImmedMask = 0x7F * B21;
    525 const Instr kOrrImmedPattern = 0x1C * B21;
    526 const Instr kCmpCmnMask = 0xDD * B20 | 0xF * B12;
    527 const Instr kCmpCmnPattern = 0x15 * B20;
    528 const Instr kCmpCmnFlip = B21;
    529 const Instr kAddSubFlip = 0x6 * B21;
    530 const Instr kAndBicFlip = 0xE * B21;
    531 
    532 // A mask for the Rd register for push, pop, ldr, str instructions.
    533 const Instr kLdrRegFpOffsetPattern = al | B26 | L | Offset | fp.code() * B16;
    534 const Instr kStrRegFpOffsetPattern = al | B26 | Offset | fp.code() * B16;
    535 const Instr kLdrRegFpNegOffsetPattern =
    536     al | B26 | L | NegOffset | fp.code() * B16;
    537 const Instr kStrRegFpNegOffsetPattern = al | B26 | NegOffset | fp.code() * B16;
    538 const Instr kLdrStrInstrTypeMask = 0xFFFF0000;
    539 
    540 Assembler::Assembler(const AssemblerOptions& options, void* buffer,
    541                      int buffer_size)
    542     : AssemblerBase(options, buffer, buffer_size),
    543       pending_32_bit_constants_(),
    544       pending_64_bit_constants_(),
    545       scratch_register_list_(ip.bit()) {
    546   pending_32_bit_constants_.reserve(kMinNumPendingConstants);
    547   pending_64_bit_constants_.reserve(kMinNumPendingConstants);
    548   reloc_info_writer.Reposition(buffer_ + buffer_size_, pc_);
    549   next_buffer_check_ = 0;
    550   const_pool_blocked_nesting_ = 0;
    551   no_const_pool_before_ = 0;
    552   first_const_pool_32_use_ = -1;
    553   first_const_pool_64_use_ = -1;
    554   last_bound_pos_ = 0;
    555   if (CpuFeatures::IsSupported(VFP32DREGS)) {
    556     // Register objects tend to be abstracted and survive between scopes, so
    557     // it's awkward to use CpuFeatures::VFP32DREGS with CpuFeatureScope. To make
    558     // its use consistent with other features, we always enable it if we can.
    559     EnableCpuFeature(VFP32DREGS);
    560     // Make sure we pick two D registers which alias a Q register. This way, we
    561     // can use a Q as a scratch if NEON is supported.
    562     scratch_vfp_register_list_ = d14.ToVfpRegList() | d15.ToVfpRegList();
    563   } else {
    564     // When VFP32DREGS is not supported, d15 become allocatable. Therefore we
    565     // cannot use it as a scratch.
    566     scratch_vfp_register_list_ = d14.ToVfpRegList();
    567   }
    568 }
    569 
    570 Assembler::~Assembler() {
    571   DCHECK_EQ(const_pool_blocked_nesting_, 0);
    572 }
    573 
    574 void Assembler::GetCode(Isolate* isolate, CodeDesc* desc) {
    575   // Emit constant pool if necessary.
    576   int constant_pool_offset = 0;
    577   CheckConstPool(true, false);
    578   DCHECK(pending_32_bit_constants_.empty());
    579   DCHECK(pending_64_bit_constants_.empty());
    580 
    581   AllocateAndInstallRequestedHeapObjects(isolate);
    582 
    583   // Set up code descriptor.
    584   desc->buffer = buffer_;
    585   desc->buffer_size = buffer_size_;
    586   desc->instr_size = pc_offset();
    587   desc->reloc_size = (buffer_ + buffer_size_) - reloc_info_writer.pos();
    588   desc->constant_pool_size =
    589       (constant_pool_offset ? desc->instr_size - constant_pool_offset : 0);
    590   desc->origin = this;
    591   desc->unwinding_info_size = 0;
    592   desc->unwinding_info = nullptr;
    593 }
    594 
    595 
    596 void Assembler::Align(int m) {
    597   DCHECK(m >= 4 && base::bits::IsPowerOfTwo(m));
    598   DCHECK_EQ(pc_offset() & (kInstrSize - 1), 0);
    599   while ((pc_offset() & (m - 1)) != 0) {
    600     nop();
    601   }
    602 }
    603 
    604 
    605 void Assembler::CodeTargetAlign() {
    606   // Preferred alignment of jump targets on some ARM chips.
    607   Align(8);
    608 }
    609 
    610 
    611 Condition Assembler::GetCondition(Instr instr) {
    612   return Instruction::ConditionField(instr);
    613 }
    614 
    615 bool Assembler::IsLdrRegisterImmediate(Instr instr) {
    616   return (instr & (B27 | B26 | B25 | B22 | B20)) == (B26 | B20);
    617 }
    618 
    619 
    620 bool Assembler::IsVldrDRegisterImmediate(Instr instr) {
    621   return (instr & (15 * B24 | 3 * B20 | 15 * B8)) == (13 * B24 | B20 | 11 * B8);
    622 }
    623 
    624 
    625 int Assembler::GetLdrRegisterImmediateOffset(Instr instr) {
    626   DCHECK(IsLdrRegisterImmediate(instr));
    627   bool positive = (instr & B23) == B23;
    628   int offset = instr & kOff12Mask;  // Zero extended offset.
    629   return positive ? offset : -offset;
    630 }
    631 
    632 
    633 int Assembler::GetVldrDRegisterImmediateOffset(Instr instr) {
    634   DCHECK(IsVldrDRegisterImmediate(instr));
    635   bool positive = (instr & B23) == B23;
    636   int offset = instr & kOff8Mask;  // Zero extended offset.
    637   offset <<= 2;
    638   return positive ? offset : -offset;
    639 }
    640 
    641 
    642 Instr Assembler::SetLdrRegisterImmediateOffset(Instr instr, int offset) {
    643   DCHECK(IsLdrRegisterImmediate(instr));
    644   bool positive = offset >= 0;
    645   if (!positive) offset = -offset;
    646   DCHECK(is_uint12(offset));
    647   // Set bit indicating whether the offset should be added.
    648   instr = (instr & ~B23) | (positive ? B23 : 0);
    649   // Set the actual offset.
    650   return (instr & ~kOff12Mask) | offset;
    651 }
    652 
    653 
    654 Instr Assembler::SetVldrDRegisterImmediateOffset(Instr instr, int offset) {
    655   DCHECK(IsVldrDRegisterImmediate(instr));
    656   DCHECK((offset & ~3) == offset);  // Must be 64-bit aligned.
    657   bool positive = offset >= 0;
    658   if (!positive) offset = -offset;
    659   DCHECK(is_uint10(offset));
    660   // Set bit indicating whether the offset should be added.
    661   instr = (instr & ~B23) | (positive ? B23 : 0);
    662   // Set the actual offset. Its bottom 2 bits are zero.
    663   return (instr & ~kOff8Mask) | (offset >> 2);
    664 }
    665 
    666 
    667 bool Assembler::IsStrRegisterImmediate(Instr instr) {
    668   return (instr & (B27 | B26 | B25 | B22 | B20)) == B26;
    669 }
    670 
    671 
    672 Instr Assembler::SetStrRegisterImmediateOffset(Instr instr, int offset) {
    673   DCHECK(IsStrRegisterImmediate(instr));
    674   bool positive = offset >= 0;
    675   if (!positive) offset = -offset;
    676   DCHECK(is_uint12(offset));
    677   // Set bit indicating whether the offset should be added.
    678   instr = (instr & ~B23) | (positive ? B23 : 0);
    679   // Set the actual offset.
    680   return (instr & ~kOff12Mask) | offset;
    681 }
    682 
    683 
    684 bool Assembler::IsAddRegisterImmediate(Instr instr) {
    685   return (instr & (B27 | B26 | B25 | B24 | B23 | B22 | B21)) == (B25 | B23);
    686 }
    687 
    688 
    689 Instr Assembler::SetAddRegisterImmediateOffset(Instr instr, int offset) {
    690   DCHECK(IsAddRegisterImmediate(instr));
    691   DCHECK_GE(offset, 0);
    692   DCHECK(is_uint12(offset));
    693   // Set the offset.
    694   return (instr & ~kOff12Mask) | offset;
    695 }
    696 
    697 
    698 Register Assembler::GetRd(Instr instr) {
    699   return Register::from_code(Instruction::RdValue(instr));
    700 }
    701 
    702 
    703 Register Assembler::GetRn(Instr instr) {
    704   return Register::from_code(Instruction::RnValue(instr));
    705 }
    706 
    707 
    708 Register Assembler::GetRm(Instr instr) {
    709   return Register::from_code(Instruction::RmValue(instr));
    710 }
    711 
    712 
    713 bool Assembler::IsPush(Instr instr) {
    714   return ((instr & ~kRdMask) == kPushRegPattern);
    715 }
    716 
    717 
    718 bool Assembler::IsPop(Instr instr) {
    719   return ((instr & ~kRdMask) == kPopRegPattern);
    720 }
    721 
    722 
    723 bool Assembler::IsStrRegFpOffset(Instr instr) {
    724   return ((instr & kLdrStrInstrTypeMask) == kStrRegFpOffsetPattern);
    725 }
    726 
    727 
    728 bool Assembler::IsLdrRegFpOffset(Instr instr) {
    729   return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpOffsetPattern);
    730 }
    731 
    732 
    733 bool Assembler::IsStrRegFpNegOffset(Instr instr) {
    734   return ((instr & kLdrStrInstrTypeMask) == kStrRegFpNegOffsetPattern);
    735 }
    736 
    737 
    738 bool Assembler::IsLdrRegFpNegOffset(Instr instr) {
    739   return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpNegOffsetPattern);
    740 }
    741 
    742 
    743 bool Assembler::IsLdrPcImmediateOffset(Instr instr) {
    744   // Check the instruction is indeed a
    745   // ldr<cond> <Rd>, [pc +/- offset_12].
    746   return (instr & kLdrPCImmedMask) == kLdrPCImmedPattern;
    747 }
    748 
    749 
    750 bool Assembler::IsVldrDPcImmediateOffset(Instr instr) {
    751   // Check the instruction is indeed a
    752   // vldr<cond> <Dd>, [pc +/- offset_10].
    753   return (instr & kVldrDPCMask) == kVldrDPCPattern;
    754 }
    755 
    756 
    757 bool Assembler::IsBlxReg(Instr instr) {
    758   // Check the instruction is indeed a
    759   // blxcc <Rm>
    760   return (instr & kBlxRegMask) == kBlxRegPattern;
    761 }
    762 
    763 
    764 bool Assembler::IsBlxIp(Instr instr) {
    765   // Check the instruction is indeed a
    766   // blx ip
    767   return instr == kBlxIp;
    768 }
    769 
    770 
    771 bool Assembler::IsTstImmediate(Instr instr) {
    772   return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask)) ==
    773       (I | TST | S);
    774 }
    775 
    776 
    777 bool Assembler::IsCmpRegister(Instr instr) {
    778   return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask | B4)) ==
    779       (CMP | S);
    780 }
    781 
    782 
    783 bool Assembler::IsCmpImmediate(Instr instr) {
    784   return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask)) ==
    785       (I | CMP | S);
    786 }
    787 
    788 
    789 Register Assembler::GetCmpImmediateRegister(Instr instr) {
    790   DCHECK(IsCmpImmediate(instr));
    791   return GetRn(instr);
    792 }
    793 
    794 
    795 int Assembler::GetCmpImmediateRawImmediate(Instr instr) {
    796   DCHECK(IsCmpImmediate(instr));
    797   return instr & kOff12Mask;
    798 }
    799 
    800 
    801 // Labels refer to positions in the (to be) generated code.
    802 // There are bound, linked, and unused labels.
    803 //
    804 // Bound labels refer to known positions in the already
    805 // generated code. pos() is the position the label refers to.
    806 //
    807 // Linked labels refer to unknown positions in the code
    808 // to be generated; pos() is the position of the last
    809 // instruction using the label.
    810 //
    811 // The linked labels form a link chain by making the branch offset
    812 // in the instruction steam to point to the previous branch
    813 // instruction using the same label.
    814 //
    815 // The link chain is terminated by a branch offset pointing to the
    816 // same position.
    817 
    818 
    819 int Assembler::target_at(int pos) {
    820   Instr instr = instr_at(pos);
    821   if (is_uint24(instr)) {
    822     // Emitted link to a label, not part of a branch.
    823     return instr;
    824   }
    825   DCHECK_EQ(5 * B25, instr & 7 * B25);  // b, bl, or blx imm24
    826   int imm26 = ((instr & kImm24Mask) << 8) >> 6;
    827   if ((Instruction::ConditionField(instr) == kSpecialCondition) &&
    828       ((instr & B24) != 0)) {
    829     // blx uses bit 24 to encode bit 2 of imm26
    830     imm26 += 2;
    831   }
    832   return pos + Instruction::kPcLoadDelta + imm26;
    833 }
    834 
    835 
    836 void Assembler::target_at_put(int pos, int target_pos) {
    837   Instr instr = instr_at(pos);
    838   if (is_uint24(instr)) {
    839     DCHECK(target_pos == pos || target_pos >= 0);
    840     // Emitted link to a label, not part of a branch.
    841     // Load the position of the label relative to the generated code object
    842     // pointer in a register.
    843 
    844     // The existing code must be a single 24-bit label chain link, followed by
    845     // nops encoding the destination register. See mov_label_offset.
    846 
    847     // Extract the destination register from the first nop instructions.
    848     Register dst =
    849         Register::from_code(Instruction::RmValue(instr_at(pos + kInstrSize)));
    850     // In addition to the 24-bit label chain link, we expect to find one nop for
    851     // ARMv7 and above, or two nops for ARMv6. See mov_label_offset.
    852     DCHECK(IsNop(instr_at(pos + kInstrSize), dst.code()));
    853     if (!CpuFeatures::IsSupported(ARMv7)) {
    854       DCHECK(IsNop(instr_at(pos + 2 * kInstrSize), dst.code()));
    855     }
    856 
    857     // Here are the instructions we need to emit:
    858     //   For ARMv7: target24 => target16_1:target16_0
    859     //      movw dst, #target16_0
    860     //      movt dst, #target16_1
    861     //   For ARMv6: target24 => target8_2:target8_1:target8_0
    862     //      mov dst, #target8_0
    863     //      orr dst, dst, #target8_1 << 8
    864     //      orr dst, dst, #target8_2 << 16
    865 
    866     uint32_t target24 = target_pos + (Code::kHeaderSize - kHeapObjectTag);
    867     DCHECK(is_uint24(target24));
    868     if (is_uint8(target24)) {
    869       // If the target fits in a byte then only patch with a mov
    870       // instruction.
    871       PatchingAssembler patcher(options(),
    872                                 reinterpret_cast<byte*>(buffer_ + pos), 1);
    873       patcher.mov(dst, Operand(target24));
    874     } else {
    875       uint16_t target16_0 = target24 & kImm16Mask;
    876       uint16_t target16_1 = target24 >> 16;
    877       if (CpuFeatures::IsSupported(ARMv7)) {
    878         // Patch with movw/movt.
    879         if (target16_1 == 0) {
    880           PatchingAssembler patcher(options(),
    881                                     reinterpret_cast<byte*>(buffer_ + pos), 1);
    882           CpuFeatureScope scope(&patcher, ARMv7);
    883           patcher.movw(dst, target16_0);
    884         } else {
    885           PatchingAssembler patcher(options(),
    886                                     reinterpret_cast<byte*>(buffer_ + pos), 2);
    887           CpuFeatureScope scope(&patcher, ARMv7);
    888           patcher.movw(dst, target16_0);
    889           patcher.movt(dst, target16_1);
    890         }
    891       } else {
    892         // Patch with a sequence of mov/orr/orr instructions.
    893         uint8_t target8_0 = target16_0 & kImm8Mask;
    894         uint8_t target8_1 = target16_0 >> 8;
    895         uint8_t target8_2 = target16_1 & kImm8Mask;
    896         if (target8_2 == 0) {
    897           PatchingAssembler patcher(options(),
    898                                     reinterpret_cast<byte*>(buffer_ + pos), 2);
    899           patcher.mov(dst, Operand(target8_0));
    900           patcher.orr(dst, dst, Operand(target8_1 << 8));
    901         } else {
    902           PatchingAssembler patcher(options(),
    903                                     reinterpret_cast<byte*>(buffer_ + pos), 3);
    904           patcher.mov(dst, Operand(target8_0));
    905           patcher.orr(dst, dst, Operand(target8_1 << 8));
    906           patcher.orr(dst, dst, Operand(target8_2 << 16));
    907         }
    908       }
    909     }
    910     return;
    911   }
    912   int imm26 = target_pos - (pos + Instruction::kPcLoadDelta);
    913   DCHECK_EQ(5 * B25, instr & 7 * B25);  // b, bl, or blx imm24
    914   if (Instruction::ConditionField(instr) == kSpecialCondition) {
    915     // blx uses bit 24 to encode bit 2 of imm26
    916     DCHECK_EQ(0, imm26 & 1);
    917     instr = (instr & ~(B24 | kImm24Mask)) | ((imm26 & 2) >> 1) * B24;
    918   } else {
    919     DCHECK_EQ(0, imm26 & 3);
    920     instr &= ~kImm24Mask;
    921   }
    922   int imm24 = imm26 >> 2;
    923   DCHECK(is_int24(imm24));
    924   instr_at_put(pos, instr | (imm24 & kImm24Mask));
    925 }
    926 
    927 void Assembler::print(const Label* L) {
    928   if (L->is_unused()) {
    929     PrintF("unused label\n");
    930   } else if (L->is_bound()) {
    931     PrintF("bound label to %d\n", L->pos());
    932   } else if (L->is_linked()) {
    933     Label l;
    934     l.link_to(L->pos());
    935     PrintF("unbound label");
    936     while (l.is_linked()) {
    937       PrintF("@ %d ", l.pos());
    938       Instr instr = instr_at(l.pos());
    939       if ((instr & ~kImm24Mask) == 0) {
    940         PrintF("value\n");
    941       } else {
    942         DCHECK_EQ(instr & 7 * B25, 5 * B25);  // b, bl, or blx
    943         Condition cond = Instruction::ConditionField(instr);
    944         const char* b;
    945         const char* c;
    946         if (cond == kSpecialCondition) {
    947           b = "blx";
    948           c = "";
    949         } else {
    950           if ((instr & B24) != 0)
    951             b = "bl";
    952           else
    953             b = "b";
    954 
    955           switch (cond) {
    956             case eq: c = "eq"; break;
    957             case ne: c = "ne"; break;
    958             case hs: c = "hs"; break;
    959             case lo: c = "lo"; break;
    960             case mi: c = "mi"; break;
    961             case pl: c = "pl"; break;
    962             case vs: c = "vs"; break;
    963             case vc: c = "vc"; break;
    964             case hi: c = "hi"; break;
    965             case ls: c = "ls"; break;
    966             case ge: c = "ge"; break;
    967             case lt: c = "lt"; break;
    968             case gt: c = "gt"; break;
    969             case le: c = "le"; break;
    970             case al: c = ""; break;
    971             default:
    972               c = "";
    973               UNREACHABLE();
    974           }
    975         }
    976         PrintF("%s%s\n", b, c);
    977       }
    978       next(&l);
    979     }
    980   } else {
    981     PrintF("label in inconsistent state (pos = %d)\n", L->pos_);
    982   }
    983 }
    984 
    985 
    986 void Assembler::bind_to(Label* L, int pos) {
    987   DCHECK(0 <= pos && pos <= pc_offset());  // must have a valid binding position
    988   while (L->is_linked()) {
    989     int fixup_pos = L->pos();
    990     next(L);  // call next before overwriting link with target at fixup_pos
    991     target_at_put(fixup_pos, pos);
    992   }
    993   L->bind_to(pos);
    994 
    995   // Keep track of the last bound label so we don't eliminate any instructions
    996   // before a bound label.
    997   if (pos > last_bound_pos_)
    998     last_bound_pos_ = pos;
    999 }
   1000 
   1001 
   1002 void Assembler::bind(Label* L) {
   1003   DCHECK(!L->is_bound());  // label can only be bound once
   1004   bind_to(L, pc_offset());
   1005 }
   1006 
   1007 
   1008 void Assembler::next(Label* L) {
   1009   DCHECK(L->is_linked());
   1010   int link = target_at(L->pos());
   1011   if (link == L->pos()) {
   1012     // Branch target points to the same instruction. This is the end of the link
   1013     // chain.
   1014     L->Unuse();
   1015   } else {
   1016     DCHECK_GE(link, 0);
   1017     L->link_to(link);
   1018   }
   1019 }
   1020 
   1021 namespace {
   1022 
   1023 // Low-level code emission routines depending on the addressing mode.
   1024 // If this returns true then you have to use the rotate_imm and immed_8
   1025 // that it returns, because it may have already changed the instruction
   1026 // to match them!
   1027 bool FitsShifter(uint32_t imm32, uint32_t* rotate_imm, uint32_t* immed_8,
   1028                  Instr* instr) {
   1029   // imm32 must be unsigned.
   1030   for (int rot = 0; rot < 16; rot++) {
   1031     uint32_t imm8 = base::bits::RotateLeft32(imm32, 2 * rot);
   1032     if ((imm8 <= 0xFF)) {
   1033       *rotate_imm = rot;
   1034       *immed_8 = imm8;
   1035       return true;
   1036     }
   1037   }
   1038   // If the opcode is one with a complementary version and the complementary
   1039   // immediate fits, change the opcode.
   1040   if (instr != nullptr) {
   1041     if ((*instr & kMovMvnMask) == kMovMvnPattern) {
   1042       if (FitsShifter(~imm32, rotate_imm, immed_8, nullptr)) {
   1043         *instr ^= kMovMvnFlip;
   1044         return true;
   1045       } else if ((*instr & kMovLeaveCCMask) == kMovLeaveCCPattern) {
   1046         if (CpuFeatures::IsSupported(ARMv7)) {
   1047           if (imm32 < 0x10000) {
   1048             *instr ^= kMovwLeaveCCFlip;
   1049             *instr |= Assembler::EncodeMovwImmediate(imm32);
   1050             *rotate_imm = *immed_8 = 0;  // Not used for movw.
   1051             return true;
   1052           }
   1053         }
   1054       }
   1055     } else if ((*instr & kCmpCmnMask) == kCmpCmnPattern) {
   1056       if (FitsShifter(-static_cast<int>(imm32), rotate_imm, immed_8, nullptr)) {
   1057         *instr ^= kCmpCmnFlip;
   1058         return true;
   1059       }
   1060     } else {
   1061       Instr alu_insn = (*instr & kALUMask);
   1062       if (alu_insn == ADD ||
   1063           alu_insn == SUB) {
   1064         if (FitsShifter(-static_cast<int>(imm32), rotate_imm, immed_8,
   1065                         nullptr)) {
   1066           *instr ^= kAddSubFlip;
   1067           return true;
   1068         }
   1069       } else if (alu_insn == AND ||
   1070                  alu_insn == BIC) {
   1071         if (FitsShifter(~imm32, rotate_imm, immed_8, nullptr)) {
   1072           *instr ^= kAndBicFlip;
   1073           return true;
   1074         }
   1075       }
   1076     }
   1077   }
   1078   return false;
   1079 }
   1080 
   1081 // We have to use the temporary register for things that can be relocated even
   1082 // if they can be encoded in the ARM's 12 bits of immediate-offset instruction
   1083 // space.  There is no guarantee that the relocated location can be similarly
   1084 // encoded.
   1085 bool MustOutputRelocInfo(RelocInfo::Mode rmode, const Assembler* assembler) {
   1086   if (RelocInfo::IsOnlyForSerializer(rmode)) {
   1087     if (assembler->predictable_code_size()) return true;
   1088     return assembler->options().record_reloc_info_for_serialization;
   1089   } else if (RelocInfo::IsNone(rmode)) {
   1090     return false;
   1091   }
   1092   return true;
   1093 }
   1094 
   1095 bool UseMovImmediateLoad(const Operand& x, const Assembler* assembler) {
   1096   DCHECK_NOT_NULL(assembler);
   1097   if (x.MustOutputRelocInfo(assembler)) {
   1098     // Prefer constant pool if data is likely to be patched.
   1099     return false;
   1100   } else {
   1101     // Otherwise, use immediate load if movw / movt is available.
   1102     return CpuFeatures::IsSupported(ARMv7);
   1103   }
   1104 }
   1105 
   1106 }  // namespace
   1107 
   1108 bool Operand::MustOutputRelocInfo(const Assembler* assembler) const {
   1109   return v8::internal::MustOutputRelocInfo(rmode_, assembler);
   1110 }
   1111 
   1112 int Operand::InstructionsRequired(const Assembler* assembler,
   1113                                   Instr instr) const {
   1114   DCHECK_NOT_NULL(assembler);
   1115   if (rm_.is_valid()) return 1;
   1116   uint32_t dummy1, dummy2;
   1117   if (MustOutputRelocInfo(assembler) ||
   1118       !FitsShifter(immediate(), &dummy1, &dummy2, &instr)) {
   1119     // The immediate operand cannot be encoded as a shifter operand, or use of
   1120     // constant pool is required.  First account for the instructions required
   1121     // for the constant pool or immediate load
   1122     int instructions;
   1123     if (UseMovImmediateLoad(*this, assembler)) {
   1124       DCHECK(CpuFeatures::IsSupported(ARMv7));
   1125       // A movw / movt immediate load.
   1126       instructions = 2;
   1127     } else {
   1128       // A small constant pool load.
   1129       instructions = 1;
   1130     }
   1131     if ((instr & ~kCondMask) != 13 * B21) {  // mov, S not set
   1132       // For a mov or mvn instruction which doesn't set the condition
   1133       // code, the constant pool or immediate load is enough, otherwise we need
   1134       // to account for the actual instruction being requested.
   1135       instructions += 1;
   1136     }
   1137     return instructions;
   1138   } else {
   1139     // No use of constant pool and the immediate operand can be encoded as a
   1140     // shifter operand.
   1141     return 1;
   1142   }
   1143 }
   1144 
   1145 void Assembler::Move32BitImmediate(Register rd, const Operand& x,
   1146                                    Condition cond) {
   1147   if (UseMovImmediateLoad(x, this)) {
   1148     CpuFeatureScope scope(this, ARMv7);
   1149     // UseMovImmediateLoad should return false when we need to output
   1150     // relocation info, since we prefer the constant pool for values that
   1151     // can be patched.
   1152     DCHECK(!x.MustOutputRelocInfo(this));
   1153     UseScratchRegisterScope temps(this);
   1154     // Re-use the destination register as a scratch if possible.
   1155     Register target = rd != pc ? rd : temps.Acquire();
   1156     uint32_t imm32 = static_cast<uint32_t>(x.immediate());
   1157     movw(target, imm32 & 0xFFFF, cond);
   1158     movt(target, imm32 >> 16, cond);
   1159     if (target.code() != rd.code()) {
   1160       mov(rd, target, LeaveCC, cond);
   1161     }
   1162   } else {
   1163     int32_t immediate;
   1164     if (x.IsHeapObjectRequest()) {
   1165       RequestHeapObject(x.heap_object_request());
   1166       immediate = 0;
   1167     } else {
   1168       immediate = x.immediate();
   1169     }
   1170     ConstantPoolAddEntry(pc_offset(), x.rmode_, immediate);
   1171     ldr_pcrel(rd, 0, cond);
   1172   }
   1173 }
   1174 
   1175 void Assembler::AddrMode1(Instr instr, Register rd, Register rn,
   1176                           const Operand& x) {
   1177   CheckBuffer();
   1178   uint32_t opcode = instr & kOpCodeMask;
   1179   bool set_flags = (instr & S) != 0;
   1180   DCHECK((opcode == ADC) || (opcode == ADD) || (opcode == AND) ||
   1181          (opcode == BIC) || (opcode == EOR) || (opcode == ORR) ||
   1182          (opcode == RSB) || (opcode == RSC) || (opcode == SBC) ||
   1183          (opcode == SUB) || (opcode == CMN) || (opcode == CMP) ||
   1184          (opcode == TEQ) || (opcode == TST) || (opcode == MOV) ||
   1185          (opcode == MVN));
   1186   // For comparison instructions, rd is not defined.
   1187   DCHECK(rd.is_valid() || (opcode == CMN) || (opcode == CMP) ||
   1188          (opcode == TEQ) || (opcode == TST));
   1189   // For move instructions, rn is not defined.
   1190   DCHECK(rn.is_valid() || (opcode == MOV) || (opcode == MVN));
   1191   DCHECK(rd.is_valid() || rn.is_valid());
   1192   DCHECK_EQ(instr & ~(kCondMask | kOpCodeMask | S), 0);
   1193   if (!AddrMode1TryEncodeOperand(&instr, x)) {
   1194     DCHECK(x.IsImmediate());
   1195     // Upon failure to encode, the opcode should not have changed.
   1196     DCHECK(opcode == (instr & kOpCodeMask));
   1197     UseScratchRegisterScope temps(this);
   1198     Condition cond = Instruction::ConditionField(instr);
   1199     if ((opcode == MOV) && !set_flags) {
   1200       // Generate a sequence of mov instructions or a load from the constant
   1201       // pool only for a MOV instruction which does not set the flags.
   1202       DCHECK(!rn.is_valid());
   1203       Move32BitImmediate(rd, x, cond);
   1204     } else if ((opcode == ADD) && !set_flags && (rd == rn) &&
   1205                !temps.CanAcquire()) {
   1206       // Split the operation into a sequence of additions if we cannot use a
   1207       // scratch register. In this case, we cannot re-use rn and the assembler
   1208       // does not have any scratch registers to spare.
   1209       uint32_t imm = x.immediate();
   1210       do {
   1211         // The immediate encoding format is composed of 8 bits of data and 4
   1212         // bits encoding a rotation. Each of the 16 possible rotations accounts
   1213         // for a rotation by an even number.
   1214         //   4 bits -> 16 rotations possible
   1215         //          -> 16 rotations of 2 bits each fits in a 32-bit value.
   1216         // This means that finding the even number of trailing zeroes of the
   1217         // immediate allows us to more efficiently split it:
   1218         int trailing_zeroes = base::bits::CountTrailingZeros(imm) & ~1u;
   1219         uint32_t mask = (0xFF << trailing_zeroes);
   1220         add(rd, rd, Operand(imm & mask), LeaveCC, cond);
   1221         imm = imm & ~mask;
   1222       } while (!ImmediateFitsAddrMode1Instruction(imm));
   1223       add(rd, rd, Operand(imm), LeaveCC, cond);
   1224     } else {
   1225       // The immediate operand cannot be encoded as a shifter operand, so load
   1226       // it first to a scratch register and change the original instruction to
   1227       // use it.
   1228       // Re-use the destination register if possible.
   1229       Register scratch =
   1230           (rd.is_valid() && rd != rn && rd != pc) ? rd : temps.Acquire();
   1231       mov(scratch, x, LeaveCC, cond);
   1232       AddrMode1(instr, rd, rn, Operand(scratch));
   1233     }
   1234     return;
   1235   }
   1236   if (!rd.is_valid()) {
   1237     // Emit a comparison instruction.
   1238     emit(instr | rn.code() * B16);
   1239   } else if (!rn.is_valid()) {
   1240     // Emit a move instruction. If the operand is a register-shifted register,
   1241     // then prevent the destination from being PC as this is unpredictable.
   1242     DCHECK(!x.IsRegisterShiftedRegister() || rd != pc);
   1243     emit(instr | rd.code() * B12);
   1244   } else {
   1245     emit(instr | rn.code() * B16 | rd.code() * B12);
   1246   }
   1247   if (rn == pc || x.rm_ == pc) {
   1248     // Block constant pool emission for one instruction after reading pc.
   1249     BlockConstPoolFor(1);
   1250   }
   1251 }
   1252 
   1253 bool Assembler::AddrMode1TryEncodeOperand(Instr* instr, const Operand& x) {
   1254   if (x.IsImmediate()) {
   1255     // Immediate.
   1256     uint32_t rotate_imm;
   1257     uint32_t immed_8;
   1258     if (x.MustOutputRelocInfo(this) ||
   1259         !FitsShifter(x.immediate(), &rotate_imm, &immed_8, instr)) {
   1260       // Let the caller handle generating multiple instructions.
   1261       return false;
   1262     }
   1263     *instr |= I | rotate_imm * B8 | immed_8;
   1264   } else if (x.IsImmediateShiftedRegister()) {
   1265     *instr |= x.shift_imm_ * B7 | x.shift_op_ | x.rm_.code();
   1266   } else {
   1267     DCHECK(x.IsRegisterShiftedRegister());
   1268     // It is unpredictable to use the PC in this case.
   1269     DCHECK(x.rm_ != pc && x.rs_ != pc);
   1270     *instr |= x.rs_.code() * B8 | x.shift_op_ | B4 | x.rm_.code();
   1271   }
   1272 
   1273   return true;
   1274 }
   1275 
   1276 void Assembler::AddrMode2(Instr instr, Register rd, const MemOperand& x) {
   1277   DCHECK((instr & ~(kCondMask | B | L)) == B26);
   1278   // This method does not handle pc-relative addresses. ldr_pcrel() should be
   1279   // used instead.
   1280   DCHECK(x.rn_ != pc);
   1281   int am = x.am_;
   1282   if (!x.rm_.is_valid()) {
   1283     // Immediate offset.
   1284     int offset_12 = x.offset_;
   1285     if (offset_12 < 0) {
   1286       offset_12 = -offset_12;
   1287       am ^= U;
   1288     }
   1289     if (!is_uint12(offset_12)) {
   1290       // Immediate offset cannot be encoded, load it first to a scratch
   1291       // register.
   1292       UseScratchRegisterScope temps(this);
   1293       // Allow re-using rd for load instructions if possible.
   1294       bool is_load = (instr & L) == L;
   1295       Register scratch =
   1296           (is_load && rd != x.rn_ && rd != pc) ? rd : temps.Acquire();
   1297       mov(scratch, Operand(x.offset_), LeaveCC,
   1298           Instruction::ConditionField(instr));
   1299       AddrMode2(instr, rd, MemOperand(x.rn_, scratch, x.am_));
   1300       return;
   1301     }
   1302     DCHECK_GE(offset_12, 0);  // no masking needed
   1303     instr |= offset_12;
   1304   } else {
   1305     // Register offset (shift_imm_ and shift_op_ are 0) or scaled
   1306     // register offset the constructors make sure than both shift_imm_
   1307     // and shift_op_ are initialized.
   1308     DCHECK(x.rm_ != pc);
   1309     instr |= B25 | x.shift_imm_*B7 | x.shift_op_ | x.rm_.code();
   1310   }
   1311   DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
   1312   emit(instr | am | x.rn_.code()*B16 | rd.code()*B12);
   1313 }
   1314 
   1315 void Assembler::AddrMode3(Instr instr, Register rd, const MemOperand& x) {
   1316   DCHECK((instr & ~(kCondMask | L | S6 | H)) == (B4 | B7));
   1317   DCHECK(x.rn_.is_valid());
   1318   // This method does not handle pc-relative addresses. ldr_pcrel() should be
   1319   // used instead.
   1320   DCHECK(x.rn_ != pc);
   1321   int am = x.am_;
   1322   bool is_load = (instr & L) == L;
   1323   if (!x.rm_.is_valid()) {
   1324     // Immediate offset.
   1325     int offset_8 = x.offset_;
   1326     if (offset_8 < 0) {
   1327       offset_8 = -offset_8;
   1328       am ^= U;
   1329     }
   1330     if (!is_uint8(offset_8)) {
   1331       // Immediate offset cannot be encoded, load it first to a scratch
   1332       // register.
   1333       UseScratchRegisterScope temps(this);
   1334       // Allow re-using rd for load instructions if possible.
   1335       Register scratch =
   1336           (is_load && rd != x.rn_ && rd != pc) ? rd : temps.Acquire();
   1337       mov(scratch, Operand(x.offset_), LeaveCC,
   1338           Instruction::ConditionField(instr));
   1339       AddrMode3(instr, rd, MemOperand(x.rn_, scratch, x.am_));
   1340       return;
   1341     }
   1342     DCHECK_GE(offset_8, 0);  // no masking needed
   1343     instr |= B | (offset_8 >> 4) * B8 | (offset_8 & 0xF);
   1344   } else if (x.shift_imm_ != 0) {
   1345     // Scaled register offsets are not supported, compute the offset separately
   1346     // to a scratch register.
   1347     UseScratchRegisterScope temps(this);
   1348     // Allow re-using rd for load instructions if possible.
   1349     Register scratch =
   1350         (is_load && rd != x.rn_ && rd != pc) ? rd : temps.Acquire();
   1351     mov(scratch, Operand(x.rm_, x.shift_op_, x.shift_imm_), LeaveCC,
   1352         Instruction::ConditionField(instr));
   1353     AddrMode3(instr, rd, MemOperand(x.rn_, scratch, x.am_));
   1354     return;
   1355   } else {
   1356     // Register offset.
   1357     DCHECK((am & (P | W)) == P || x.rm_ != pc);  // no pc index with writeback
   1358     instr |= x.rm_.code();
   1359   }
   1360   DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
   1361   emit(instr | am | x.rn_.code()*B16 | rd.code()*B12);
   1362 }
   1363 
   1364 void Assembler::AddrMode4(Instr instr, Register rn, RegList rl) {
   1365   DCHECK((instr & ~(kCondMask | P | U | W | L)) == B27);
   1366   DCHECK_NE(rl, 0);
   1367   DCHECK(rn != pc);
   1368   emit(instr | rn.code()*B16 | rl);
   1369 }
   1370 
   1371 void Assembler::AddrMode5(Instr instr, CRegister crd, const MemOperand& x) {
   1372   // Unindexed addressing is not encoded by this function.
   1373   DCHECK_EQ((B27 | B26),
   1374             (instr & ~(kCondMask | kCoprocessorMask | P | U | N | W | L)));
   1375   DCHECK(x.rn_.is_valid() && !x.rm_.is_valid());
   1376   int am = x.am_;
   1377   int offset_8 = x.offset_;
   1378   DCHECK_EQ(offset_8 & 3, 0);  // offset must be an aligned word offset
   1379   offset_8 >>= 2;
   1380   if (offset_8 < 0) {
   1381     offset_8 = -offset_8;
   1382     am ^= U;
   1383   }
   1384   DCHECK(is_uint8(offset_8));  // unsigned word offset must fit in a byte
   1385   DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
   1386 
   1387   // Post-indexed addressing requires W == 1; different than in AddrMode2/3.
   1388   if ((am & P) == 0)
   1389     am |= W;
   1390 
   1391   DCHECK_GE(offset_8, 0);  // no masking needed
   1392   emit(instr | am | x.rn_.code()*B16 | crd.code()*B12 | offset_8);
   1393 }
   1394 
   1395 
   1396 int Assembler::branch_offset(Label* L) {
   1397   int target_pos;
   1398   if (L->is_bound()) {
   1399     target_pos = L->pos();
   1400   } else {
   1401     if (L->is_linked()) {
   1402       // Point to previous instruction that uses the link.
   1403       target_pos = L->pos();
   1404     } else {
   1405       // First entry of the link chain points to itself.
   1406       target_pos = pc_offset();
   1407     }
   1408     L->link_to(pc_offset());
   1409   }
   1410 
   1411   // Block the emission of the constant pool, since the branch instruction must
   1412   // be emitted at the pc offset recorded by the label.
   1413   if (!is_const_pool_blocked()) BlockConstPoolFor(1);
   1414 
   1415   return target_pos - (pc_offset() + Instruction::kPcLoadDelta);
   1416 }
   1417 
   1418 
   1419 // Branch instructions.
   1420 void Assembler::b(int branch_offset, Condition cond, RelocInfo::Mode rmode) {
   1421   RecordRelocInfo(rmode);
   1422   DCHECK_EQ(branch_offset & 3, 0);
   1423   int imm24 = branch_offset >> 2;
   1424   const bool b_imm_check = is_int24(imm24);
   1425   CHECK(b_imm_check);
   1426   emit(cond | B27 | B25 | (imm24 & kImm24Mask));
   1427 
   1428   if (cond == al) {
   1429     // Dead code is a good location to emit the constant pool.
   1430     CheckConstPool(false, false);
   1431   }
   1432 }
   1433 
   1434 void Assembler::bl(int branch_offset, Condition cond, RelocInfo::Mode rmode) {
   1435   RecordRelocInfo(rmode);
   1436   DCHECK_EQ(branch_offset & 3, 0);
   1437   int imm24 = branch_offset >> 2;
   1438   const bool bl_imm_check = is_int24(imm24);
   1439   CHECK(bl_imm_check);
   1440   emit(cond | B27 | B25 | B24 | (imm24 & kImm24Mask));
   1441 }
   1442 
   1443 void Assembler::blx(int branch_offset) {
   1444   DCHECK_EQ(branch_offset & 1, 0);
   1445   int h = ((branch_offset & 2) >> 1)*B24;
   1446   int imm24 = branch_offset >> 2;
   1447   const bool blx_imm_check = is_int24(imm24);
   1448   CHECK(blx_imm_check);
   1449   emit(kSpecialCondition | B27 | B25 | h | (imm24 & kImm24Mask));
   1450 }
   1451 
   1452 void Assembler::blx(Register target, Condition cond) {
   1453   DCHECK(target != pc);
   1454   emit(cond | B24 | B21 | 15*B16 | 15*B12 | 15*B8 | BLX | target.code());
   1455 }
   1456 
   1457 void Assembler::bx(Register target, Condition cond) {
   1458   DCHECK(target != pc);  // use of pc is actually allowed, but discouraged
   1459   emit(cond | B24 | B21 | 15*B16 | 15*B12 | 15*B8 | BX | target.code());
   1460 }
   1461 
   1462 
   1463 void Assembler::b(Label* L, Condition cond) {
   1464   CheckBuffer();
   1465   b(branch_offset(L), cond);
   1466 }
   1467 
   1468 
   1469 void Assembler::bl(Label* L, Condition cond) {
   1470   CheckBuffer();
   1471   bl(branch_offset(L), cond);
   1472 }
   1473 
   1474 
   1475 void Assembler::blx(Label* L) {
   1476   CheckBuffer();
   1477   blx(branch_offset(L));
   1478 }
   1479 
   1480 
   1481 // Data-processing instructions.
   1482 
   1483 void Assembler::and_(Register dst, Register src1, const Operand& src2,
   1484                      SBit s, Condition cond) {
   1485   AddrMode1(cond | AND | s, dst, src1, src2);
   1486 }
   1487 
   1488 void Assembler::and_(Register dst, Register src1, Register src2, SBit s,
   1489                      Condition cond) {
   1490   and_(dst, src1, Operand(src2), s, cond);
   1491 }
   1492 
   1493 void Assembler::eor(Register dst, Register src1, const Operand& src2,
   1494                     SBit s, Condition cond) {
   1495   AddrMode1(cond | EOR | s, dst, src1, src2);
   1496 }
   1497 
   1498 
   1499 void Assembler::sub(Register dst, Register src1, const Operand& src2,
   1500                     SBit s, Condition cond) {
   1501   AddrMode1(cond | SUB | s, dst, src1, src2);
   1502 }
   1503 
   1504 void Assembler::sub(Register dst, Register src1, Register src2, SBit s,
   1505                     Condition cond) {
   1506   sub(dst, src1, Operand(src2), s, cond);
   1507 }
   1508 
   1509 void Assembler::rsb(Register dst, Register src1, const Operand& src2,
   1510                     SBit s, Condition cond) {
   1511   AddrMode1(cond | RSB | s, dst, src1, src2);
   1512 }
   1513 
   1514 
   1515 void Assembler::add(Register dst, Register src1, const Operand& src2,
   1516                     SBit s, Condition cond) {
   1517   AddrMode1(cond | ADD | s, dst, src1, src2);
   1518 }
   1519 
   1520 void Assembler::add(Register dst, Register src1, Register src2, SBit s,
   1521                     Condition cond) {
   1522   add(dst, src1, Operand(src2), s, cond);
   1523 }
   1524 
   1525 void Assembler::adc(Register dst, Register src1, const Operand& src2,
   1526                     SBit s, Condition cond) {
   1527   AddrMode1(cond | ADC | s, dst, src1, src2);
   1528 }
   1529 
   1530 
   1531 void Assembler::sbc(Register dst, Register src1, const Operand& src2,
   1532                     SBit s, Condition cond) {
   1533   AddrMode1(cond | SBC | s, dst, src1, src2);
   1534 }
   1535 
   1536 
   1537 void Assembler::rsc(Register dst, Register src1, const Operand& src2,
   1538                     SBit s, Condition cond) {
   1539   AddrMode1(cond | RSC | s, dst, src1, src2);
   1540 }
   1541 
   1542 
   1543 void Assembler::tst(Register src1, const Operand& src2, Condition cond) {
   1544   AddrMode1(cond | TST | S, no_reg, src1, src2);
   1545 }
   1546 
   1547 void Assembler::tst(Register src1, Register src2, Condition cond) {
   1548   tst(src1, Operand(src2), cond);
   1549 }
   1550 
   1551 void Assembler::teq(Register src1, const Operand& src2, Condition cond) {
   1552   AddrMode1(cond | TEQ | S, no_reg, src1, src2);
   1553 }
   1554 
   1555 
   1556 void Assembler::cmp(Register src1, const Operand& src2, Condition cond) {
   1557   AddrMode1(cond | CMP | S, no_reg, src1, src2);
   1558 }
   1559 
   1560 void Assembler::cmp(Register src1, Register src2, Condition cond) {
   1561   cmp(src1, Operand(src2), cond);
   1562 }
   1563 
   1564 void Assembler::cmp_raw_immediate(
   1565     Register src, int raw_immediate, Condition cond) {
   1566   DCHECK(is_uint12(raw_immediate));
   1567   emit(cond | I | CMP | S | src.code() << 16 | raw_immediate);
   1568 }
   1569 
   1570 
   1571 void Assembler::cmn(Register src1, const Operand& src2, Condition cond) {
   1572   AddrMode1(cond | CMN | S, no_reg, src1, src2);
   1573 }
   1574 
   1575 
   1576 void Assembler::orr(Register dst, Register src1, const Operand& src2,
   1577                     SBit s, Condition cond) {
   1578   AddrMode1(cond | ORR | s, dst, src1, src2);
   1579 }
   1580 
   1581 void Assembler::orr(Register dst, Register src1, Register src2, SBit s,
   1582                     Condition cond) {
   1583   orr(dst, src1, Operand(src2), s, cond);
   1584 }
   1585 
   1586 void Assembler::mov(Register dst, const Operand& src, SBit s, Condition cond) {
   1587   // Don't allow nop instructions in the form mov rn, rn to be generated using
   1588   // the mov instruction. They must be generated using nop(int/NopMarkerTypes).
   1589   DCHECK(!(src.IsRegister() && src.rm() == dst && s == LeaveCC && cond == al));
   1590   AddrMode1(cond | MOV | s, dst, no_reg, src);
   1591 }
   1592 
   1593 void Assembler::mov(Register dst, Register src, SBit s, Condition cond) {
   1594   mov(dst, Operand(src), s, cond);
   1595 }
   1596 
   1597 void Assembler::mov_label_offset(Register dst, Label* label) {
   1598   if (label->is_bound()) {
   1599     mov(dst, Operand(label->pos() + (Code::kHeaderSize - kHeapObjectTag)));
   1600   } else {
   1601     // Emit the link to the label in the code stream followed by extra nop
   1602     // instructions.
   1603     // If the label is not linked, then start a new link chain by linking it to
   1604     // itself, emitting pc_offset().
   1605     int link = label->is_linked() ? label->pos() : pc_offset();
   1606     label->link_to(pc_offset());
   1607 
   1608     // When the label is bound, these instructions will be patched with a
   1609     // sequence of movw/movt or mov/orr/orr instructions. They will load the
   1610     // destination register with the position of the label from the beginning
   1611     // of the code.
   1612     //
   1613     // The link will be extracted from the first instruction and the destination
   1614     // register from the second.
   1615     //   For ARMv7:
   1616     //      link
   1617     //      mov dst, dst
   1618     //   For ARMv6:
   1619     //      link
   1620     //      mov dst, dst
   1621     //      mov dst, dst
   1622     //
   1623     // When the label gets bound: target_at extracts the link and target_at_put
   1624     // patches the instructions.
   1625     CHECK(is_uint24(link));
   1626     BlockConstPoolScope block_const_pool(this);
   1627     emit(link);
   1628     nop(dst.code());
   1629     if (!CpuFeatures::IsSupported(ARMv7)) {
   1630       nop(dst.code());
   1631     }
   1632   }
   1633 }
   1634 
   1635 
   1636 void Assembler::movw(Register reg, uint32_t immediate, Condition cond) {
   1637   DCHECK(IsEnabled(ARMv7));
   1638   emit(cond | 0x30*B20 | reg.code()*B12 | EncodeMovwImmediate(immediate));
   1639 }
   1640 
   1641 
   1642 void Assembler::movt(Register reg, uint32_t immediate, Condition cond) {
   1643   DCHECK(IsEnabled(ARMv7));
   1644   emit(cond | 0x34*B20 | reg.code()*B12 | EncodeMovwImmediate(immediate));
   1645 }
   1646 
   1647 
   1648 void Assembler::bic(Register dst, Register src1, const Operand& src2,
   1649                     SBit s, Condition cond) {
   1650   AddrMode1(cond | BIC | s, dst, src1, src2);
   1651 }
   1652 
   1653 
   1654 void Assembler::mvn(Register dst, const Operand& src, SBit s, Condition cond) {
   1655   AddrMode1(cond | MVN | s, dst, no_reg, src);
   1656 }
   1657 
   1658 void Assembler::asr(Register dst, Register src1, const Operand& src2, SBit s,
   1659                     Condition cond) {
   1660   if (src2.IsRegister()) {
   1661     mov(dst, Operand(src1, ASR, src2.rm()), s, cond);
   1662   } else {
   1663     mov(dst, Operand(src1, ASR, src2.immediate()), s, cond);
   1664   }
   1665 }
   1666 
   1667 void Assembler::lsl(Register dst, Register src1, const Operand& src2, SBit s,
   1668                     Condition cond) {
   1669   if (src2.IsRegister()) {
   1670     mov(dst, Operand(src1, LSL, src2.rm()), s, cond);
   1671   } else {
   1672     mov(dst, Operand(src1, LSL, src2.immediate()), s, cond);
   1673   }
   1674 }
   1675 
   1676 void Assembler::lsr(Register dst, Register src1, const Operand& src2, SBit s,
   1677                     Condition cond) {
   1678   if (src2.IsRegister()) {
   1679     mov(dst, Operand(src1, LSR, src2.rm()), s, cond);
   1680   } else {
   1681     mov(dst, Operand(src1, LSR, src2.immediate()), s, cond);
   1682   }
   1683 }
   1684 
   1685 // Multiply instructions.
   1686 void Assembler::mla(Register dst, Register src1, Register src2, Register srcA,
   1687                     SBit s, Condition cond) {
   1688   DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
   1689   emit(cond | A | s | dst.code()*B16 | srcA.code()*B12 |
   1690        src2.code()*B8 | B7 | B4 | src1.code());
   1691 }
   1692 
   1693 
   1694 void Assembler::mls(Register dst, Register src1, Register src2, Register srcA,
   1695                     Condition cond) {
   1696   DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
   1697   DCHECK(IsEnabled(ARMv7));
   1698   emit(cond | B22 | B21 | dst.code()*B16 | srcA.code()*B12 |
   1699        src2.code()*B8 | B7 | B4 | src1.code());
   1700 }
   1701 
   1702 
   1703 void Assembler::sdiv(Register dst, Register src1, Register src2,
   1704                      Condition cond) {
   1705   DCHECK(dst != pc && src1 != pc && src2 != pc);
   1706   DCHECK(IsEnabled(SUDIV));
   1707   emit(cond | B26 | B25 | B24 | B20 | dst.code() * B16 | 0xF * B12 |
   1708        src2.code() * B8 | B4 | src1.code());
   1709 }
   1710 
   1711 
   1712 void Assembler::udiv(Register dst, Register src1, Register src2,
   1713                      Condition cond) {
   1714   DCHECK(dst != pc && src1 != pc && src2 != pc);
   1715   DCHECK(IsEnabled(SUDIV));
   1716   emit(cond | B26 | B25 | B24 | B21 | B20 | dst.code() * B16 | 0xF * B12 |
   1717        src2.code() * B8 | B4 | src1.code());
   1718 }
   1719 
   1720 
   1721 void Assembler::mul(Register dst, Register src1, Register src2, SBit s,
   1722                     Condition cond) {
   1723   DCHECK(dst != pc && src1 != pc && src2 != pc);
   1724   // dst goes in bits 16-19 for this instruction!
   1725   emit(cond | s | dst.code() * B16 | src2.code() * B8 | B7 | B4 | src1.code());
   1726 }
   1727 
   1728 
   1729 void Assembler::smmla(Register dst, Register src1, Register src2, Register srcA,
   1730                       Condition cond) {
   1731   DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
   1732   emit(cond | B26 | B25 | B24 | B22 | B20 | dst.code() * B16 |
   1733        srcA.code() * B12 | src2.code() * B8 | B4 | src1.code());
   1734 }
   1735 
   1736 
   1737 void Assembler::smmul(Register dst, Register src1, Register src2,
   1738                       Condition cond) {
   1739   DCHECK(dst != pc && src1 != pc && src2 != pc);
   1740   emit(cond | B26 | B25 | B24 | B22 | B20 | dst.code() * B16 | 0xF * B12 |
   1741        src2.code() * B8 | B4 | src1.code());
   1742 }
   1743 
   1744 
   1745 void Assembler::smlal(Register dstL,
   1746                       Register dstH,
   1747                       Register src1,
   1748                       Register src2,
   1749                       SBit s,
   1750                       Condition cond) {
   1751   DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
   1752   DCHECK(dstL != dstH);
   1753   emit(cond | B23 | B22 | A | s | dstH.code()*B16 | dstL.code()*B12 |
   1754        src2.code()*B8 | B7 | B4 | src1.code());
   1755 }
   1756 
   1757 
   1758 void Assembler::smull(Register dstL,
   1759                       Register dstH,
   1760                       Register src1,
   1761                       Register src2,
   1762                       SBit s,
   1763                       Condition cond) {
   1764   DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
   1765   DCHECK(dstL != dstH);
   1766   emit(cond | B23 | B22 | s | dstH.code()*B16 | dstL.code()*B12 |
   1767        src2.code()*B8 | B7 | B4 | src1.code());
   1768 }
   1769 
   1770 
   1771 void Assembler::umlal(Register dstL,
   1772                       Register dstH,
   1773                       Register src1,
   1774                       Register src2,
   1775                       SBit s,
   1776                       Condition cond) {
   1777   DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
   1778   DCHECK(dstL != dstH);
   1779   emit(cond | B23 | A | s | dstH.code()*B16 | dstL.code()*B12 |
   1780        src2.code()*B8 | B7 | B4 | src1.code());
   1781 }
   1782 
   1783 
   1784 void Assembler::umull(Register dstL,
   1785                       Register dstH,
   1786                       Register src1,
   1787                       Register src2,
   1788                       SBit s,
   1789                       Condition cond) {
   1790   DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
   1791   DCHECK(dstL != dstH);
   1792   emit(cond | B23 | s | dstH.code()*B16 | dstL.code()*B12 |
   1793        src2.code()*B8 | B7 | B4 | src1.code());
   1794 }
   1795 
   1796 
   1797 // Miscellaneous arithmetic instructions.
   1798 void Assembler::clz(Register dst, Register src, Condition cond) {
   1799   DCHECK(dst != pc && src != pc);
   1800   emit(cond | B24 | B22 | B21 | 15*B16 | dst.code()*B12 |
   1801        15*B8 | CLZ | src.code());
   1802 }
   1803 
   1804 
   1805 // Saturating instructions.
   1806 
   1807 // Unsigned saturate.
   1808 void Assembler::usat(Register dst,
   1809                      int satpos,
   1810                      const Operand& src,
   1811                      Condition cond) {
   1812   DCHECK(dst != pc && src.rm_ != pc);
   1813   DCHECK((satpos >= 0) && (satpos <= 31));
   1814   DCHECK(src.IsImmediateShiftedRegister());
   1815   DCHECK((src.shift_op_ == ASR) || (src.shift_op_ == LSL));
   1816 
   1817   int sh = 0;
   1818   if (src.shift_op_ == ASR) {
   1819       sh = 1;
   1820   }
   1821 
   1822   emit(cond | 0x6 * B24 | 0xE * B20 | satpos * B16 | dst.code() * B12 |
   1823        src.shift_imm_ * B7 | sh * B6 | 0x1 * B4 | src.rm_.code());
   1824 }
   1825 
   1826 
   1827 // Bitfield manipulation instructions.
   1828 
   1829 // Unsigned bit field extract.
   1830 // Extracts #width adjacent bits from position #lsb in a register, and
   1831 // writes them to the low bits of a destination register.
   1832 //   ubfx dst, src, #lsb, #width
   1833 void Assembler::ubfx(Register dst,
   1834                      Register src,
   1835                      int lsb,
   1836                      int width,
   1837                      Condition cond) {
   1838   DCHECK(IsEnabled(ARMv7));
   1839   DCHECK(dst != pc && src != pc);
   1840   DCHECK((lsb >= 0) && (lsb <= 31));
   1841   DCHECK((width >= 1) && (width <= (32 - lsb)));
   1842   emit(cond | 0xF * B23 | B22 | B21 | (width - 1) * B16 | dst.code() * B12 |
   1843        lsb * B7 | B6 | B4 | src.code());
   1844 }
   1845 
   1846 
   1847 // Signed bit field extract.
   1848 // Extracts #width adjacent bits from position #lsb in a register, and
   1849 // writes them to the low bits of a destination register. The extracted
   1850 // value is sign extended to fill the destination register.
   1851 //   sbfx dst, src, #lsb, #width
   1852 void Assembler::sbfx(Register dst,
   1853                      Register src,
   1854                      int lsb,
   1855                      int width,
   1856                      Condition cond) {
   1857   DCHECK(IsEnabled(ARMv7));
   1858   DCHECK(dst != pc && src != pc);
   1859   DCHECK((lsb >= 0) && (lsb <= 31));
   1860   DCHECK((width >= 1) && (width <= (32 - lsb)));
   1861   emit(cond | 0xF * B23 | B21 | (width - 1) * B16 | dst.code() * B12 |
   1862        lsb * B7 | B6 | B4 | src.code());
   1863 }
   1864 
   1865 
   1866 // Bit field clear.
   1867 // Sets #width adjacent bits at position #lsb in the destination register
   1868 // to zero, preserving the value of the other bits.
   1869 //   bfc dst, #lsb, #width
   1870 void Assembler::bfc(Register dst, int lsb, int width, Condition cond) {
   1871   DCHECK(IsEnabled(ARMv7));
   1872   DCHECK(dst != pc);
   1873   DCHECK((lsb >= 0) && (lsb <= 31));
   1874   DCHECK((width >= 1) && (width <= (32 - lsb)));
   1875   int msb = lsb + width - 1;
   1876   emit(cond | 0x1F * B22 | msb * B16 | dst.code() * B12 | lsb * B7 | B4 | 0xF);
   1877 }
   1878 
   1879 
   1880 // Bit field insert.
   1881 // Inserts #width adjacent bits from the low bits of the source register
   1882 // into position #lsb of the destination register.
   1883 //   bfi dst, src, #lsb, #width
   1884 void Assembler::bfi(Register dst,
   1885                     Register src,
   1886                     int lsb,
   1887                     int width,
   1888                     Condition cond) {
   1889   DCHECK(IsEnabled(ARMv7));
   1890   DCHECK(dst != pc && src != pc);
   1891   DCHECK((lsb >= 0) && (lsb <= 31));
   1892   DCHECK((width >= 1) && (width <= (32 - lsb)));
   1893   int msb = lsb + width - 1;
   1894   emit(cond | 0x1F * B22 | msb * B16 | dst.code() * B12 | lsb * B7 | B4 |
   1895        src.code());
   1896 }
   1897 
   1898 
   1899 void Assembler::pkhbt(Register dst,
   1900                       Register src1,
   1901                       const Operand& src2,
   1902                       Condition cond ) {
   1903   // Instruction details available in ARM DDI 0406C.b, A8.8.125.
   1904   // cond(31-28) | 01101000(27-20) | Rn(19-16) |
   1905   // Rd(15-12) | imm5(11-7) | 0(6) | 01(5-4) | Rm(3-0)
   1906   DCHECK(dst != pc);
   1907   DCHECK(src1 != pc);
   1908   DCHECK(src2.IsImmediateShiftedRegister());
   1909   DCHECK(src2.rm() != pc);
   1910   DCHECK((src2.shift_imm_ >= 0) && (src2.shift_imm_ <= 31));
   1911   DCHECK(src2.shift_op() == LSL);
   1912   emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 |
   1913        src2.shift_imm_*B7 | B4 | src2.rm().code());
   1914 }
   1915 
   1916 
   1917 void Assembler::pkhtb(Register dst,
   1918                       Register src1,
   1919                       const Operand& src2,
   1920                       Condition cond) {
   1921   // Instruction details available in ARM DDI 0406C.b, A8.8.125.
   1922   // cond(31-28) | 01101000(27-20) | Rn(19-16) |
   1923   // Rd(15-12) | imm5(11-7) | 1(6) | 01(5-4) | Rm(3-0)
   1924   DCHECK(dst != pc);
   1925   DCHECK(src1 != pc);
   1926   DCHECK(src2.IsImmediateShiftedRegister());
   1927   DCHECK(src2.rm() != pc);
   1928   DCHECK((src2.shift_imm_ >= 1) && (src2.shift_imm_ <= 32));
   1929   DCHECK(src2.shift_op() == ASR);
   1930   int asr = (src2.shift_imm_ == 32) ? 0 : src2.shift_imm_;
   1931   emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 |
   1932        asr*B7 | B6 | B4 | src2.rm().code());
   1933 }
   1934 
   1935 
   1936 void Assembler::sxtb(Register dst, Register src, int rotate, Condition cond) {
   1937   // Instruction details available in ARM DDI 0406C.b, A8.8.233.
   1938   // cond(31-28) | 01101010(27-20) | 1111(19-16) |
   1939   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
   1940   DCHECK(dst != pc);
   1941   DCHECK(src != pc);
   1942   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
   1943   emit(cond | 0x6A * B20 | 0xF * B16 | dst.code() * B12 |
   1944        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
   1945 }
   1946 
   1947 
   1948 void Assembler::sxtab(Register dst, Register src1, Register src2, int rotate,
   1949                       Condition cond) {
   1950   // Instruction details available in ARM DDI 0406C.b, A8.8.233.
   1951   // cond(31-28) | 01101010(27-20) | Rn(19-16) |
   1952   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
   1953   DCHECK(dst != pc);
   1954   DCHECK(src1 != pc);
   1955   DCHECK(src2 != pc);
   1956   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
   1957   emit(cond | 0x6A * B20 | src1.code() * B16 | dst.code() * B12 |
   1958        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
   1959 }
   1960 
   1961 
   1962 void Assembler::sxth(Register dst, Register src, int rotate, Condition cond) {
   1963   // Instruction details available in ARM DDI 0406C.b, A8.8.235.
   1964   // cond(31-28) | 01101011(27-20) | 1111(19-16) |
   1965   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
   1966   DCHECK(dst != pc);
   1967   DCHECK(src != pc);
   1968   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
   1969   emit(cond | 0x6B * B20 | 0xF * B16 | dst.code() * B12 |
   1970        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
   1971 }
   1972 
   1973 
   1974 void Assembler::sxtah(Register dst, Register src1, Register src2, int rotate,
   1975                       Condition cond) {
   1976   // Instruction details available in ARM DDI 0406C.b, A8.8.235.
   1977   // cond(31-28) | 01101011(27-20) | Rn(19-16) |
   1978   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
   1979   DCHECK(dst != pc);
   1980   DCHECK(src1 != pc);
   1981   DCHECK(src2 != pc);
   1982   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
   1983   emit(cond | 0x6B * B20 | src1.code() * B16 | dst.code() * B12 |
   1984        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
   1985 }
   1986 
   1987 
   1988 void Assembler::uxtb(Register dst, Register src, int rotate, Condition cond) {
   1989   // Instruction details available in ARM DDI 0406C.b, A8.8.274.
   1990   // cond(31-28) | 01101110(27-20) | 1111(19-16) |
   1991   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
   1992   DCHECK(dst != pc);
   1993   DCHECK(src != pc);
   1994   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
   1995   emit(cond | 0x6E * B20 | 0xF * B16 | dst.code() * B12 |
   1996        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
   1997 }
   1998 
   1999 
   2000 void Assembler::uxtab(Register dst, Register src1, Register src2, int rotate,
   2001                       Condition cond) {
   2002   // Instruction details available in ARM DDI 0406C.b, A8.8.271.
   2003   // cond(31-28) | 01101110(27-20) | Rn(19-16) |
   2004   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
   2005   DCHECK(dst != pc);
   2006   DCHECK(src1 != pc);
   2007   DCHECK(src2 != pc);
   2008   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
   2009   emit(cond | 0x6E * B20 | src1.code() * B16 | dst.code() * B12 |
   2010        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
   2011 }
   2012 
   2013 
   2014 void Assembler::uxtb16(Register dst, Register src, int rotate, Condition cond) {
   2015   // Instruction details available in ARM DDI 0406C.b, A8.8.275.
   2016   // cond(31-28) | 01101100(27-20) | 1111(19-16) |
   2017   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
   2018   DCHECK(dst != pc);
   2019   DCHECK(src != pc);
   2020   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
   2021   emit(cond | 0x6C * B20 | 0xF * B16 | dst.code() * B12 |
   2022        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
   2023 }
   2024 
   2025 
   2026 void Assembler::uxth(Register dst, Register src, int rotate, Condition cond) {
   2027   // Instruction details available in ARM DDI 0406C.b, A8.8.276.
   2028   // cond(31-28) | 01101111(27-20) | 1111(19-16) |
   2029   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
   2030   DCHECK(dst != pc);
   2031   DCHECK(src != pc);
   2032   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
   2033   emit(cond | 0x6F * B20 | 0xF * B16 | dst.code() * B12 |
   2034        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
   2035 }
   2036 
   2037 
   2038 void Assembler::uxtah(Register dst, Register src1, Register src2, int rotate,
   2039                       Condition cond) {
   2040   // Instruction details available in ARM DDI 0406C.b, A8.8.273.
   2041   // cond(31-28) | 01101111(27-20) | Rn(19-16) |
   2042   // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
   2043   DCHECK(dst != pc);
   2044   DCHECK(src1 != pc);
   2045   DCHECK(src2 != pc);
   2046   DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
   2047   emit(cond | 0x6F * B20 | src1.code() * B16 | dst.code() * B12 |
   2048        ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
   2049 }
   2050 
   2051 
   2052 void Assembler::rbit(Register dst, Register src, Condition cond) {
   2053   // Instruction details available in ARM DDI 0406C.b, A8.8.144.
   2054   // cond(31-28) | 011011111111(27-16) | Rd(15-12) | 11110011(11-4) | Rm(3-0)
   2055   DCHECK(IsEnabled(ARMv7));
   2056   DCHECK(dst != pc);
   2057   DCHECK(src != pc);
   2058   emit(cond | 0x6FF * B16 | dst.code() * B12 | 0xF3 * B4 | src.code());
   2059 }
   2060 
   2061 void Assembler::rev(Register dst, Register src, Condition cond) {
   2062   // Instruction details available in ARM DDI 0406C.b, A8.8.144.
   2063   // cond(31-28) | 011010111111(27-16) | Rd(15-12) | 11110011(11-4) | Rm(3-0)
   2064   DCHECK(dst != pc);
   2065   DCHECK(src != pc);
   2066   emit(cond | 0x6BF * B16 | dst.code() * B12 | 0xF3 * B4 | src.code());
   2067 }
   2068 
   2069 // Status register access instructions.
   2070 void Assembler::mrs(Register dst, SRegister s, Condition cond) {
   2071   DCHECK(dst != pc);
   2072   emit(cond | B24 | s | 15*B16 | dst.code()*B12);
   2073 }
   2074 
   2075 
   2076 void Assembler::msr(SRegisterFieldMask fields, const Operand& src,
   2077                     Condition cond) {
   2078   DCHECK_NE(fields & 0x000F0000, 0);  // At least one field must be set.
   2079   DCHECK(((fields & 0xFFF0FFFF) == CPSR) || ((fields & 0xFFF0FFFF) == SPSR));
   2080   Instr instr;
   2081   if (src.IsImmediate()) {
   2082     // Immediate.
   2083     uint32_t rotate_imm;
   2084     uint32_t immed_8;
   2085     if (src.MustOutputRelocInfo(this) ||
   2086         !FitsShifter(src.immediate(), &rotate_imm, &immed_8, nullptr)) {
   2087       UseScratchRegisterScope temps(this);
   2088       Register scratch = temps.Acquire();
   2089       // Immediate operand cannot be encoded, load it first to a scratch
   2090       // register.
   2091       Move32BitImmediate(scratch, src);
   2092       msr(fields, Operand(scratch), cond);
   2093       return;
   2094     }
   2095     instr = I | rotate_imm*B8 | immed_8;
   2096   } else {
   2097     DCHECK(src.IsRegister());  // Only rm is allowed.
   2098     instr = src.rm_.code();
   2099   }
   2100   emit(cond | instr | B24 | B21 | fields | 15*B12);
   2101 }
   2102 
   2103 
   2104 // Load/Store instructions.
   2105 void Assembler::ldr(Register dst, const MemOperand& src, Condition cond) {
   2106   AddrMode2(cond | B26 | L, dst, src);
   2107 }
   2108 
   2109 
   2110 void Assembler::str(Register src, const MemOperand& dst, Condition cond) {
   2111   AddrMode2(cond | B26, src, dst);
   2112 }
   2113 
   2114 
   2115 void Assembler::ldrb(Register dst, const MemOperand& src, Condition cond) {
   2116   AddrMode2(cond | B26 | B | L, dst, src);
   2117 }
   2118 
   2119 
   2120 void Assembler::strb(Register src, const MemOperand& dst, Condition cond) {
   2121   AddrMode2(cond | B26 | B, src, dst);
   2122 }
   2123 
   2124 
   2125 void Assembler::ldrh(Register dst, const MemOperand& src, Condition cond) {
   2126   AddrMode3(cond | L | B7 | H | B4, dst, src);
   2127 }
   2128 
   2129 
   2130 void Assembler::strh(Register src, const MemOperand& dst, Condition cond) {
   2131   AddrMode3(cond | B7 | H | B4, src, dst);
   2132 }
   2133 
   2134 
   2135 void Assembler::ldrsb(Register dst, const MemOperand& src, Condition cond) {
   2136   AddrMode3(cond | L | B7 | S6 | B4, dst, src);
   2137 }
   2138 
   2139 
   2140 void Assembler::ldrsh(Register dst, const MemOperand& src, Condition cond) {
   2141   AddrMode3(cond | L | B7 | S6 | H | B4, dst, src);
   2142 }
   2143 
   2144 
   2145 void Assembler::ldrd(Register dst1, Register dst2,
   2146                      const MemOperand& src, Condition cond) {
   2147   DCHECK(src.rm() == no_reg);
   2148   DCHECK(dst1 != lr);  // r14.
   2149   DCHECK_EQ(0, dst1.code() % 2);
   2150   DCHECK_EQ(dst1.code() + 1, dst2.code());
   2151   AddrMode3(cond | B7 | B6 | B4, dst1, src);
   2152 }
   2153 
   2154 
   2155 void Assembler::strd(Register src1, Register src2,
   2156                      const MemOperand& dst, Condition cond) {
   2157   DCHECK(dst.rm() == no_reg);
   2158   DCHECK(src1 != lr);  // r14.
   2159   DCHECK_EQ(0, src1.code() % 2);
   2160   DCHECK_EQ(src1.code() + 1, src2.code());
   2161   AddrMode3(cond | B7 | B6 | B5 | B4, src1, dst);
   2162 }
   2163 
   2164 void Assembler::ldr_pcrel(Register dst, int imm12, Condition cond) {
   2165   AddrMode am = Offset;
   2166   if (imm12 < 0) {
   2167     imm12 = -imm12;
   2168     am = NegOffset;
   2169   }
   2170   DCHECK(is_uint12(imm12));
   2171   emit(cond | B26 | am | L | pc.code() * B16 | dst.code() * B12 | imm12);
   2172 }
   2173 
   2174 // Load/Store exclusive instructions.
   2175 void Assembler::ldrex(Register dst, Register src, Condition cond) {
   2176   // Instruction details available in ARM DDI 0406C.b, A8.8.75.
   2177   // cond(31-28) | 00011001(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
   2178   DCHECK(dst != pc);
   2179   DCHECK(src != pc);
   2180   emit(cond | B24 | B23 | B20 | src.code() * B16 | dst.code() * B12 | 0xF9F);
   2181 }
   2182 
   2183 void Assembler::strex(Register src1, Register src2, Register dst,
   2184                       Condition cond) {
   2185   // Instruction details available in ARM DDI 0406C.b, A8.8.212.
   2186   // cond(31-28) | 00011000(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
   2187   // Rt(3-0)
   2188   DCHECK(dst != pc);
   2189   DCHECK(src1 != pc);
   2190   DCHECK(src2 != pc);
   2191   DCHECK(src1 != dst);
   2192   DCHECK(src1 != src2);
   2193   emit(cond | B24 | B23 | dst.code() * B16 | src1.code() * B12 | 0xF9 * B4 |
   2194        src2.code());
   2195 }
   2196 
   2197 void Assembler::ldrexb(Register dst, Register src, Condition cond) {
   2198   // Instruction details available in ARM DDI 0406C.b, A8.8.76.
   2199   // cond(31-28) | 00011101(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
   2200   DCHECK(dst != pc);
   2201   DCHECK(src != pc);
   2202   emit(cond | B24 | B23 | B22 | B20 | src.code() * B16 | dst.code() * B12 |
   2203        0xF9F);
   2204 }
   2205 
   2206 void Assembler::strexb(Register src1, Register src2, Register dst,
   2207                        Condition cond) {
   2208   // Instruction details available in ARM DDI 0406C.b, A8.8.213.
   2209   // cond(31-28) | 00011100(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
   2210   // Rt(3-0)
   2211   DCHECK(dst != pc);
   2212   DCHECK(src1 != pc);
   2213   DCHECK(src2 != pc);
   2214   DCHECK(src1 != dst);
   2215   DCHECK(src1 != src2);
   2216   emit(cond | B24 | B23 | B22 | dst.code() * B16 | src1.code() * B12 |
   2217        0xF9 * B4 | src2.code());
   2218 }
   2219 
   2220 void Assembler::ldrexh(Register dst, Register src, Condition cond) {
   2221   // Instruction details available in ARM DDI 0406C.b, A8.8.78.
   2222   // cond(31-28) | 00011111(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
   2223   DCHECK(dst != pc);
   2224   DCHECK(src != pc);
   2225   emit(cond | B24 | B23 | B22 | B21 | B20 | src.code() * B16 |
   2226        dst.code() * B12 | 0xF9F);
   2227 }
   2228 
   2229 void Assembler::strexh(Register src1, Register src2, Register dst,
   2230                        Condition cond) {
   2231   // Instruction details available in ARM DDI 0406C.b, A8.8.215.
   2232   // cond(31-28) | 00011110(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
   2233   // Rt(3-0)
   2234   DCHECK(dst != pc);
   2235   DCHECK(src1 != pc);
   2236   DCHECK(src2 != pc);
   2237   DCHECK(src1 != dst);
   2238   DCHECK(src1 != src2);
   2239   emit(cond | B24 | B23 | B22 | B21 | dst.code() * B16 | src1.code() * B12 |
   2240        0xF9 * B4 | src2.code());
   2241 }
   2242 
   2243 void Assembler::ldrexd(Register dst1, Register dst2, Register src,
   2244                        Condition cond) {
   2245   // cond(31-28) | 00011011(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
   2246   DCHECK(dst1 != lr);  // r14.
   2247   // The pair of destination registers is restricted to being an even-numbered
   2248   // register and the odd-numbered register that immediately follows it.
   2249   DCHECK_EQ(0, dst1.code() % 2);
   2250   DCHECK_EQ(dst1.code() + 1, dst2.code());
   2251   emit(cond | B24 | B23 | B21 | B20 | src.code() * B16 | dst1.code() * B12 |
   2252        0xF9F);
   2253 }
   2254 
   2255 void Assembler::strexd(Register res, Register src1, Register src2, Register dst,
   2256                        Condition cond) {
   2257   // cond(31-28) | 00011010(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
   2258   DCHECK(src1 != lr);  // r14.
   2259   // The pair of source registers is restricted to being an even-numbered
   2260   // register and the odd-numbered register that immediately follows it.
   2261   DCHECK_EQ(0, src1.code() % 2);
   2262   DCHECK_EQ(src1.code() + 1, src2.code());
   2263   emit(cond | B24 | B23 | B21 | dst.code() * B16 | res.code() * B12 |
   2264        0xF9 * B4 | src1.code());
   2265 }
   2266 
   2267 // Preload instructions.
   2268 void Assembler::pld(const MemOperand& address) {
   2269   // Instruction details available in ARM DDI 0406C.b, A8.8.128.
   2270   // 1111(31-28) | 0111(27-24) | U(23) | R(22) | 01(21-20) | Rn(19-16) |
   2271   // 1111(15-12) | imm5(11-07) | type(6-5) | 0(4)| Rm(3-0) |
   2272   DCHECK(address.rm() == no_reg);
   2273   DCHECK(address.am() == Offset);
   2274   int U = B23;
   2275   int offset = address.offset();
   2276   if (offset < 0) {
   2277     offset = -offset;
   2278     U = 0;
   2279   }
   2280   DCHECK_LT(offset, 4096);
   2281   emit(kSpecialCondition | B26 | B24 | U | B22 | B20 |
   2282        address.rn().code() * B16 | 0xF * B12 | offset);
   2283 }
   2284 
   2285 
   2286 // Load/Store multiple instructions.
   2287 void Assembler::ldm(BlockAddrMode am,
   2288                     Register base,
   2289                     RegList dst,
   2290                     Condition cond) {
   2291   // ABI stack constraint: ldmxx base, {..sp..}  base != sp  is not restartable.
   2292   DCHECK(base == sp || (dst & sp.bit()) == 0);
   2293 
   2294   AddrMode4(cond | B27 | am | L, base, dst);
   2295 
   2296   // Emit the constant pool after a function return implemented by ldm ..{..pc}.
   2297   if (cond == al && (dst & pc.bit()) != 0) {
   2298     // There is a slight chance that the ldm instruction was actually a call,
   2299     // in which case it would be wrong to return into the constant pool; we
   2300     // recognize this case by checking if the emission of the pool was blocked
   2301     // at the pc of the ldm instruction by a mov lr, pc instruction; if this is
   2302     // the case, we emit a jump over the pool.
   2303     CheckConstPool(true, no_const_pool_before_ == pc_offset() - kInstrSize);
   2304   }
   2305 }
   2306 
   2307 
   2308 void Assembler::stm(BlockAddrMode am,
   2309                     Register base,
   2310                     RegList src,
   2311                     Condition cond) {
   2312   AddrMode4(cond | B27 | am, base, src);
   2313 }
   2314 
   2315 
   2316 // Exception-generating instructions and debugging support.
   2317 // Stops with a non-negative code less than kNumOfWatchedStops support
   2318 // enabling/disabling and a counter feature. See simulator-arm.h .
   2319 void Assembler::stop(const char* msg, Condition cond, int32_t code) {
   2320 #ifndef __arm__
   2321   DCHECK_GE(code, kDefaultStopCode);
   2322   {
   2323     BlockConstPoolScope block_const_pool(this);
   2324     if (code >= 0) {
   2325       svc(kStopCode + code, cond);
   2326     } else {
   2327       svc(kStopCode + kMaxStopCode, cond);
   2328     }
   2329   }
   2330 #else  // def __arm__
   2331   if (cond != al) {
   2332     Label skip;
   2333     b(&skip, NegateCondition(cond));
   2334     bkpt(0);
   2335     bind(&skip);
   2336   } else {
   2337     bkpt(0);
   2338   }
   2339 #endif  // def __arm__
   2340 }
   2341 
   2342 void Assembler::bkpt(uint32_t imm16) {
   2343   DCHECK(is_uint16(imm16));
   2344   emit(al | B24 | B21 | (imm16 >> 4) * B8 | BKPT | (imm16 & 0xF));
   2345 }
   2346 
   2347 
   2348 void Assembler::svc(uint32_t imm24, Condition cond) {
   2349   DCHECK(is_uint24(imm24));
   2350   emit(cond | 15*B24 | imm24);
   2351 }
   2352 
   2353 
   2354 void Assembler::dmb(BarrierOption option) {
   2355   if (CpuFeatures::IsSupported(ARMv7)) {
   2356     // Details available in ARM DDI 0406C.b, A8-378.
   2357     emit(kSpecialCondition | 0x57FF * B12 | 5 * B4 | option);
   2358   } else {
   2359     // Details available in ARM DDI 0406C.b, B3-1750.
   2360     // CP15DMB: CRn=c7, opc1=0, CRm=c10, opc2=5, Rt is ignored.
   2361     mcr(p15, 0, r0, cr7, cr10, 5);
   2362   }
   2363 }
   2364 
   2365 
   2366 void Assembler::dsb(BarrierOption option) {
   2367   if (CpuFeatures::IsSupported(ARMv7)) {
   2368     // Details available in ARM DDI 0406C.b, A8-380.
   2369     emit(kSpecialCondition | 0x57FF * B12 | 4 * B4 | option);
   2370   } else {
   2371     // Details available in ARM DDI 0406C.b, B3-1750.
   2372     // CP15DSB: CRn=c7, opc1=0, CRm=c10, opc2=4, Rt is ignored.
   2373     mcr(p15, 0, r0, cr7, cr10, 4);
   2374   }
   2375 }
   2376 
   2377 
   2378 void Assembler::isb(BarrierOption option) {
   2379   if (CpuFeatures::IsSupported(ARMv7)) {
   2380     // Details available in ARM DDI 0406C.b, A8-389.
   2381     emit(kSpecialCondition | 0x57FF * B12 | 6 * B4 | option);
   2382   } else {
   2383     // Details available in ARM DDI 0406C.b, B3-1750.
   2384     // CP15ISB: CRn=c7, opc1=0, CRm=c5, opc2=4, Rt is ignored.
   2385     mcr(p15, 0, r0, cr7, cr5, 4);
   2386   }
   2387 }
   2388 
   2389 void Assembler::csdb() {
   2390   // Details available in Arm Cache Speculation Side-channels white paper,
   2391   // version 1.1, page 4.
   2392   emit(0xE320F014);
   2393 }
   2394 
   2395 // Coprocessor instructions.
   2396 void Assembler::cdp(Coprocessor coproc,
   2397                     int opcode_1,
   2398                     CRegister crd,
   2399                     CRegister crn,
   2400                     CRegister crm,
   2401                     int opcode_2,
   2402                     Condition cond) {
   2403   DCHECK(is_uint4(opcode_1) && is_uint3(opcode_2));
   2404   emit(cond | B27 | B26 | B25 | (opcode_1 & 15)*B20 | crn.code()*B16 |
   2405        crd.code()*B12 | coproc*B8 | (opcode_2 & 7)*B5 | crm.code());
   2406 }
   2407 
   2408 void Assembler::cdp2(Coprocessor coproc, int opcode_1, CRegister crd,
   2409                      CRegister crn, CRegister crm, int opcode_2) {
   2410   cdp(coproc, opcode_1, crd, crn, crm, opcode_2, kSpecialCondition);
   2411 }
   2412 
   2413 
   2414 void Assembler::mcr(Coprocessor coproc,
   2415                     int opcode_1,
   2416                     Register rd,
   2417                     CRegister crn,
   2418                     CRegister crm,
   2419                     int opcode_2,
   2420                     Condition cond) {
   2421   DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2));
   2422   emit(cond | B27 | B26 | B25 | (opcode_1 & 7)*B21 | crn.code()*B16 |
   2423        rd.code()*B12 | coproc*B8 | (opcode_2 & 7)*B5 | B4 | crm.code());
   2424 }
   2425 
   2426 void Assembler::mcr2(Coprocessor coproc, int opcode_1, Register rd,
   2427                      CRegister crn, CRegister crm, int opcode_2) {
   2428   mcr(coproc, opcode_1, rd, crn, crm, opcode_2, kSpecialCondition);
   2429 }
   2430 
   2431 
   2432 void Assembler::mrc(Coprocessor coproc,
   2433                     int opcode_1,
   2434                     Register rd,
   2435                     CRegister crn,
   2436                     CRegister crm,
   2437                     int opcode_2,
   2438                     Condition cond) {
   2439   DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2));
   2440   emit(cond | B27 | B26 | B25 | (opcode_1 & 7)*B21 | L | crn.code()*B16 |
   2441        rd.code()*B12 | coproc*B8 | (opcode_2 & 7)*B5 | B4 | crm.code());
   2442 }
   2443 
   2444 void Assembler::mrc2(Coprocessor coproc, int opcode_1, Register rd,
   2445                      CRegister crn, CRegister crm, int opcode_2) {
   2446   mrc(coproc, opcode_1, rd, crn, crm, opcode_2, kSpecialCondition);
   2447 }
   2448 
   2449 
   2450 void Assembler::ldc(Coprocessor coproc,
   2451                     CRegister crd,
   2452                     const MemOperand& src,
   2453                     LFlag l,
   2454                     Condition cond) {
   2455   AddrMode5(cond | B27 | B26 | l | L | coproc * B8, crd, src);
   2456 }
   2457 
   2458 
   2459 void Assembler::ldc(Coprocessor coproc,
   2460                     CRegister crd,
   2461                     Register rn,
   2462                     int option,
   2463                     LFlag l,
   2464                     Condition cond) {
   2465   // Unindexed addressing.
   2466   DCHECK(is_uint8(option));
   2467   emit(cond | B27 | B26 | U | l | L | rn.code()*B16 | crd.code()*B12 |
   2468        coproc*B8 | (option & 255));
   2469 }
   2470 
   2471 void Assembler::ldc2(Coprocessor coproc, CRegister crd, const MemOperand& src,
   2472                      LFlag l) {
   2473   ldc(coproc, crd, src, l, kSpecialCondition);
   2474 }
   2475 
   2476 void Assembler::ldc2(Coprocessor coproc, CRegister crd, Register rn, int option,
   2477                      LFlag l) {
   2478   ldc(coproc, crd, rn, option, l, kSpecialCondition);
   2479 }
   2480 
   2481 
   2482 // Support for VFP.
   2483 
   2484 void Assembler::vldr(const DwVfpRegister dst,
   2485                      const Register base,
   2486                      int offset,
   2487                      const Condition cond) {
   2488   // Ddst = MEM(Rbase + offset).
   2489   // Instruction details available in ARM DDI 0406C.b, A8-924.
   2490   // cond(31-28) | 1101(27-24)| U(23) | D(22) | 01(21-20) | Rbase(19-16) |
   2491   // Vd(15-12) | 1011(11-8) | offset
   2492   DCHECK(VfpRegisterIsAvailable(dst));
   2493   int u = 1;
   2494   if (offset < 0) {
   2495     CHECK_NE(offset, kMinInt);
   2496     offset = -offset;
   2497     u = 0;
   2498   }
   2499   int vd, d;
   2500   dst.split_code(&vd, &d);
   2501 
   2502   DCHECK_GE(offset, 0);
   2503   if ((offset % 4) == 0 && (offset / 4) < 256) {
   2504     emit(cond | 0xD*B24 | u*B23 | d*B22 | B20 | base.code()*B16 | vd*B12 |
   2505          0xB*B8 | ((offset / 4) & 255));
   2506   } else {
   2507     UseScratchRegisterScope temps(this);
   2508     Register scratch = temps.Acquire();
   2509     // Larger offsets must be handled by computing the correct address in a
   2510     // scratch register.
   2511     DCHECK(base != scratch);
   2512     if (u == 1) {
   2513       add(scratch, base, Operand(offset));
   2514     } else {
   2515       sub(scratch, base, Operand(offset));
   2516     }
   2517     emit(cond | 0xD * B24 | d * B22 | B20 | scratch.code() * B16 | vd * B12 |
   2518          0xB * B8);
   2519   }
   2520 }
   2521 
   2522 
   2523 void Assembler::vldr(const DwVfpRegister dst,
   2524                      const MemOperand& operand,
   2525                      const Condition cond) {
   2526   DCHECK(VfpRegisterIsAvailable(dst));
   2527   DCHECK(operand.am_ == Offset);
   2528   if (operand.rm().is_valid()) {
   2529     UseScratchRegisterScope temps(this);
   2530     Register scratch = temps.Acquire();
   2531     add(scratch, operand.rn(),
   2532         Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
   2533     vldr(dst, scratch, 0, cond);
   2534   } else {
   2535     vldr(dst, operand.rn(), operand.offset(), cond);
   2536   }
   2537 }
   2538 
   2539 
   2540 void Assembler::vldr(const SwVfpRegister dst,
   2541                      const Register base,
   2542                      int offset,
   2543                      const Condition cond) {
   2544   // Sdst = MEM(Rbase + offset).
   2545   // Instruction details available in ARM DDI 0406A, A8-628.
   2546   // cond(31-28) | 1101(27-24)| U001(23-20) | Rbase(19-16) |
   2547   // Vdst(15-12) | 1010(11-8) | offset
   2548   int u = 1;
   2549   if (offset < 0) {
   2550     offset = -offset;
   2551     u = 0;
   2552   }
   2553   int sd, d;
   2554   dst.split_code(&sd, &d);
   2555   DCHECK_GE(offset, 0);
   2556 
   2557   if ((offset % 4) == 0 && (offset / 4) < 256) {
   2558   emit(cond | u*B23 | d*B22 | 0xD1*B20 | base.code()*B16 | sd*B12 |
   2559        0xA*B8 | ((offset / 4) & 255));
   2560   } else {
   2561     // Larger offsets must be handled by computing the correct address in a
   2562     // scratch register.
   2563     UseScratchRegisterScope temps(this);
   2564     Register scratch = temps.Acquire();
   2565     DCHECK(base != scratch);
   2566     if (u == 1) {
   2567       add(scratch, base, Operand(offset));
   2568     } else {
   2569       sub(scratch, base, Operand(offset));
   2570     }
   2571     emit(cond | d * B22 | 0xD1 * B20 | scratch.code() * B16 | sd * B12 |
   2572          0xA * B8);
   2573   }
   2574 }
   2575 
   2576 
   2577 void Assembler::vldr(const SwVfpRegister dst,
   2578                      const MemOperand& operand,
   2579                      const Condition cond) {
   2580   DCHECK(operand.am_ == Offset);
   2581   if (operand.rm().is_valid()) {
   2582     UseScratchRegisterScope temps(this);
   2583     Register scratch = temps.Acquire();
   2584     add(scratch, operand.rn(),
   2585         Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
   2586     vldr(dst, scratch, 0, cond);
   2587   } else {
   2588     vldr(dst, operand.rn(), operand.offset(), cond);
   2589   }
   2590 }
   2591 
   2592 
   2593 void Assembler::vstr(const DwVfpRegister src,
   2594                      const Register base,
   2595                      int offset,
   2596                      const Condition cond) {
   2597   // MEM(Rbase + offset) = Dsrc.
   2598   // Instruction details available in ARM DDI 0406C.b, A8-1082.
   2599   // cond(31-28) | 1101(27-24)| U(23) | D(22) | 00(21-20) | Rbase(19-16) |
   2600   // Vd(15-12) | 1011(11-8) | (offset/4)
   2601   DCHECK(VfpRegisterIsAvailable(src));
   2602   int u = 1;
   2603   if (offset < 0) {
   2604     CHECK_NE(offset, kMinInt);
   2605     offset = -offset;
   2606     u = 0;
   2607   }
   2608   DCHECK_GE(offset, 0);
   2609   int vd, d;
   2610   src.split_code(&vd, &d);
   2611 
   2612   if ((offset % 4) == 0 && (offset / 4) < 256) {
   2613     emit(cond | 0xD*B24 | u*B23 | d*B22 | base.code()*B16 | vd*B12 | 0xB*B8 |
   2614          ((offset / 4) & 255));
   2615   } else {
   2616     // Larger offsets must be handled by computing the correct address in the a
   2617     // scratch register.
   2618     UseScratchRegisterScope temps(this);
   2619     Register scratch = temps.Acquire();
   2620     DCHECK(base != scratch);
   2621     if (u == 1) {
   2622       add(scratch, base, Operand(offset));
   2623     } else {
   2624       sub(scratch, base, Operand(offset));
   2625     }
   2626     emit(cond | 0xD * B24 | d * B22 | scratch.code() * B16 | vd * B12 |
   2627          0xB * B8);
   2628   }
   2629 }
   2630 
   2631 
   2632 void Assembler::vstr(const DwVfpRegister src,
   2633                      const MemOperand& operand,
   2634                      const Condition cond) {
   2635   DCHECK(VfpRegisterIsAvailable(src));
   2636   DCHECK(operand.am_ == Offset);
   2637   if (operand.rm().is_valid()) {
   2638     UseScratchRegisterScope temps(this);
   2639     Register scratch = temps.Acquire();
   2640     add(scratch, operand.rn(),
   2641         Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
   2642     vstr(src, scratch, 0, cond);
   2643   } else {
   2644     vstr(src, operand.rn(), operand.offset(), cond);
   2645   }
   2646 }
   2647 
   2648 
   2649 void Assembler::vstr(const SwVfpRegister src,
   2650                      const Register base,
   2651                      int offset,
   2652                      const Condition cond) {
   2653   // MEM(Rbase + offset) = SSrc.
   2654   // Instruction details available in ARM DDI 0406A, A8-786.
   2655   // cond(31-28) | 1101(27-24)| U000(23-20) | Rbase(19-16) |
   2656   // Vdst(15-12) | 1010(11-8) | (offset/4)
   2657   int u = 1;
   2658   if (offset < 0) {
   2659     CHECK_NE(offset, kMinInt);
   2660     offset = -offset;
   2661     u = 0;
   2662   }
   2663   int sd, d;
   2664   src.split_code(&sd, &d);
   2665   DCHECK_GE(offset, 0);
   2666   if ((offset % 4) == 0 && (offset / 4) < 256) {
   2667     emit(cond | u*B23 | d*B22 | 0xD0*B20 | base.code()*B16 | sd*B12 |
   2668          0xA*B8 | ((offset / 4) & 255));
   2669   } else {
   2670     // Larger offsets must be handled by computing the correct address in a
   2671     // scratch register.
   2672     UseScratchRegisterScope temps(this);
   2673     Register scratch = temps.Acquire();
   2674     DCHECK(base != scratch);
   2675     if (u == 1) {
   2676       add(scratch, base, Operand(offset));
   2677     } else {
   2678       sub(scratch, base, Operand(offset));
   2679     }
   2680     emit(cond | d * B22 | 0xD0 * B20 | scratch.code() * B16 | sd * B12 |
   2681          0xA * B8);
   2682   }
   2683 }
   2684 
   2685 
   2686 void Assembler::vstr(const SwVfpRegister src,
   2687                      const MemOperand& operand,
   2688                      const Condition cond) {
   2689   DCHECK(operand.am_ == Offset);
   2690   if (operand.rm().is_valid()) {
   2691     UseScratchRegisterScope temps(this);
   2692     Register scratch = temps.Acquire();
   2693     add(scratch, operand.rn(),
   2694         Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
   2695     vstr(src, scratch, 0, cond);
   2696   } else {
   2697     vstr(src, operand.rn(), operand.offset(), cond);
   2698   }
   2699 }
   2700 
   2701 void Assembler::vldm(BlockAddrMode am, Register base, DwVfpRegister first,
   2702                      DwVfpRegister last, Condition cond) {
   2703   // Instruction details available in ARM DDI 0406C.b, A8-922.
   2704   // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) |
   2705   // first(15-12) | 1011(11-8) | (count * 2)
   2706   DCHECK_LE(first.code(), last.code());
   2707   DCHECK(VfpRegisterIsAvailable(last));
   2708   DCHECK(am == ia || am == ia_w || am == db_w);
   2709   DCHECK(base != pc);
   2710 
   2711   int sd, d;
   2712   first.split_code(&sd, &d);
   2713   int count = last.code() - first.code() + 1;
   2714   DCHECK_LE(count, 16);
   2715   emit(cond | B27 | B26 | am | d*B22 | B20 | base.code()*B16 | sd*B12 |
   2716        0xB*B8 | count*2);
   2717 }
   2718 
   2719 void Assembler::vstm(BlockAddrMode am, Register base, DwVfpRegister first,
   2720                      DwVfpRegister last, Condition cond) {
   2721   // Instruction details available in ARM DDI 0406C.b, A8-1080.
   2722   // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) |
   2723   // first(15-12) | 1011(11-8) | (count * 2)
   2724   DCHECK_LE(first.code(), last.code());
   2725   DCHECK(VfpRegisterIsAvailable(last));
   2726   DCHECK(am == ia || am == ia_w || am == db_w);
   2727   DCHECK(base != pc);
   2728 
   2729   int sd, d;
   2730   first.split_code(&sd, &d);
   2731   int count = last.code() - first.code() + 1;
   2732   DCHECK_LE(count, 16);
   2733   emit(cond | B27 | B26 | am | d*B22 | base.code()*B16 | sd*B12 |
   2734        0xB*B8 | count*2);
   2735 }
   2736 
   2737 void Assembler::vldm(BlockAddrMode am, Register base, SwVfpRegister first,
   2738                      SwVfpRegister last, Condition cond) {
   2739   // Instruction details available in ARM DDI 0406A, A8-626.
   2740   // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) |
   2741   // first(15-12) | 1010(11-8) | (count/2)
   2742   DCHECK_LE(first.code(), last.code());
   2743   DCHECK(am == ia || am == ia_w || am == db_w);
   2744   DCHECK(base != pc);
   2745 
   2746   int sd, d;
   2747   first.split_code(&sd, &d);
   2748   int count = last.code() - first.code() + 1;
   2749   emit(cond | B27 | B26 | am | d*B22 | B20 | base.code()*B16 | sd*B12 |
   2750        0xA*B8 | count);
   2751 }
   2752 
   2753 void Assembler::vstm(BlockAddrMode am, Register base, SwVfpRegister first,
   2754                      SwVfpRegister last, Condition cond) {
   2755   // Instruction details available in ARM DDI 0406A, A8-784.
   2756   // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) |
   2757   // first(15-12) | 1011(11-8) | (count/2)
   2758   DCHECK_LE(first.code(), last.code());
   2759   DCHECK(am == ia || am == ia_w || am == db_w);
   2760   DCHECK(base != pc);
   2761 
   2762   int sd, d;
   2763   first.split_code(&sd, &d);
   2764   int count = last.code() - first.code() + 1;
   2765   emit(cond | B27 | B26 | am | d*B22 | base.code()*B16 | sd*B12 |
   2766        0xA*B8 | count);
   2767 }
   2768 
   2769 static void DoubleAsTwoUInt32(Double d, uint32_t* lo, uint32_t* hi) {
   2770   uint64_t i = d.AsUint64();
   2771 
   2772   *lo = i & 0xFFFFFFFF;
   2773   *hi = i >> 32;
   2774 }
   2775 
   2776 // Only works for little endian floating point formats.
   2777 // We don't support VFP on the mixed endian floating point platform.
   2778 static bool FitsVmovFPImmediate(Double d, uint32_t* encoding) {
   2779   // VMOV can accept an immediate of the form:
   2780   //
   2781   //  +/- m * 2^(-n) where 16 <= m <= 31 and 0 <= n <= 7
   2782   //
   2783   // The immediate is encoded using an 8-bit quantity, comprised of two
   2784   // 4-bit fields. For an 8-bit immediate of the form:
   2785   //
   2786   //  [abcdefgh]
   2787   //
   2788   // where a is the MSB and h is the LSB, an immediate 64-bit double can be
   2789   // created of the form:
   2790   //
   2791   //  [aBbbbbbb,bbcdefgh,00000000,00000000,
   2792   //      00000000,00000000,00000000,00000000]
   2793   //
   2794   // where B = ~b.
   2795   //
   2796 
   2797   uint32_t lo, hi;
   2798   DoubleAsTwoUInt32(d, &lo, &hi);
   2799 
   2800   // The most obvious constraint is the long block of zeroes.
   2801   if ((lo != 0) || ((hi & 0xFFFF) != 0)) {
   2802     return false;
   2803   }
   2804 
   2805   // Bits 61:54 must be all clear or all set.
   2806   if (((hi & 0x3FC00000) != 0) && ((hi & 0x3FC00000) != 0x3FC00000)) {
   2807     return false;
   2808   }
   2809 
   2810   // Bit 62 must be NOT bit 61.
   2811   if (((hi ^ (hi << 1)) & (0x40000000)) == 0) {
   2812     return false;
   2813   }
   2814 
   2815   // Create the encoded immediate in the form:
   2816   //  [00000000,0000abcd,00000000,0000efgh]
   2817   *encoding = (hi >> 16) & 0xF;       // Low nybble.
   2818   *encoding |= (hi >> 4) & 0x70000;   // Low three bits of the high nybble.
   2819   *encoding |= (hi >> 12) & 0x80000;  // Top bit of the high nybble.
   2820 
   2821   return true;
   2822 }
   2823 
   2824 void Assembler::vmov(const SwVfpRegister dst, Float32 imm) {
   2825   uint32_t enc;
   2826   if (CpuFeatures::IsSupported(VFPv3) &&
   2827       FitsVmovFPImmediate(Double(imm.get_scalar()), &enc)) {
   2828     CpuFeatureScope scope(this, VFPv3);
   2829     // The float can be encoded in the instruction.
   2830     //
   2831     // Sd = immediate
   2832     // Instruction details available in ARM DDI 0406C.b, A8-936.
   2833     // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
   2834     // Vd(15-12) | 101(11-9) | sz=0(8) | imm4L(3-0)
   2835     int vd, d;
   2836     dst.split_code(&vd, &d);
   2837     emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | enc);
   2838   } else {
   2839     UseScratchRegisterScope temps(this);
   2840     Register scratch = temps.Acquire();
   2841     mov(scratch, Operand(imm.get_bits()));
   2842     vmov(dst, scratch);
   2843   }
   2844 }
   2845 
   2846 void Assembler::vmov(const DwVfpRegister dst, Double imm,
   2847                      const Register extra_scratch) {
   2848   DCHECK(VfpRegisterIsAvailable(dst));
   2849   uint32_t enc;
   2850   if (CpuFeatures::IsSupported(VFPv3) && FitsVmovFPImmediate(imm, &enc)) {
   2851     CpuFeatureScope scope(this, VFPv3);
   2852     // The double can be encoded in the instruction.
   2853     //
   2854     // Dd = immediate
   2855     // Instruction details available in ARM DDI 0406C.b, A8-936.
   2856     // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
   2857     // Vd(15-12) | 101(11-9) | sz=1(8) | imm4L(3-0)
   2858     int vd, d;
   2859     dst.split_code(&vd, &d);
   2860     emit(al | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | enc);
   2861   } else {
   2862     // Synthesise the double from ARM immediates.
   2863     uint32_t lo, hi;
   2864     DoubleAsTwoUInt32(imm, &lo, &hi);
   2865     UseScratchRegisterScope temps(this);
   2866     Register scratch = temps.Acquire();
   2867 
   2868     if (lo == hi) {
   2869       // Move the low and high parts of the double to a D register in one
   2870       // instruction.
   2871       mov(scratch, Operand(lo));
   2872       vmov(dst, scratch, scratch);
   2873     } else if (extra_scratch == no_reg) {
   2874       // We only have one spare scratch register.
   2875       mov(scratch, Operand(lo));
   2876       vmov(NeonS32, dst, 0, scratch);
   2877       if (((lo & 0xFFFF) == (hi & 0xFFFF)) && CpuFeatures::IsSupported(ARMv7)) {
   2878         CpuFeatureScope scope(this, ARMv7);
   2879         movt(scratch, hi >> 16);
   2880       } else {
   2881         mov(scratch, Operand(hi));
   2882       }
   2883       vmov(NeonS32, dst, 1, scratch);
   2884     } else {
   2885       // Move the low and high parts of the double to a D register in one
   2886       // instruction.
   2887       mov(scratch, Operand(lo));
   2888       mov(extra_scratch, Operand(hi));
   2889       vmov(dst, scratch, extra_scratch);
   2890     }
   2891   }
   2892 }
   2893 
   2894 void Assembler::vmov(const SwVfpRegister dst,
   2895                      const SwVfpRegister src,
   2896                      const Condition cond) {
   2897   // Sd = Sm
   2898   // Instruction details available in ARM DDI 0406B, A8-642.
   2899   int sd, d, sm, m;
   2900   dst.split_code(&sd, &d);
   2901   src.split_code(&sm, &m);
   2902   emit(cond | 0xE*B24 | d*B22 | 0xB*B20 | sd*B12 | 0xA*B8 | B6 | m*B5 | sm);
   2903 }
   2904 
   2905 
   2906 void Assembler::vmov(const DwVfpRegister dst,
   2907                      const DwVfpRegister src,
   2908                      const Condition cond) {
   2909   // Dd = Dm
   2910   // Instruction details available in ARM DDI 0406C.b, A8-938.
   2911   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
   2912   // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   2913   DCHECK(VfpRegisterIsAvailable(dst));
   2914   DCHECK(VfpRegisterIsAvailable(src));
   2915   int vd, d;
   2916   dst.split_code(&vd, &d);
   2917   int vm, m;
   2918   src.split_code(&vm, &m);
   2919   emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | B6 | m*B5 |
   2920        vm);
   2921 }
   2922 
   2923 void Assembler::vmov(const DwVfpRegister dst,
   2924                      const Register src1,
   2925                      const Register src2,
   2926                      const Condition cond) {
   2927   // Dm = <Rt,Rt2>.
   2928   // Instruction details available in ARM DDI 0406C.b, A8-948.
   2929   // cond(31-28) | 1100(27-24)| 010(23-21) | op=0(20) | Rt2(19-16) |
   2930   // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
   2931   DCHECK(VfpRegisterIsAvailable(dst));
   2932   DCHECK(src1 != pc && src2 != pc);
   2933   int vm, m;
   2934   dst.split_code(&vm, &m);
   2935   emit(cond | 0xC*B24 | B22 | src2.code()*B16 |
   2936        src1.code()*B12 | 0xB*B8 | m*B5 | B4 | vm);
   2937 }
   2938 
   2939 
   2940 void Assembler::vmov(const Register dst1,
   2941                      const Register dst2,
   2942                      const DwVfpRegister src,
   2943                      const Condition cond) {
   2944   // <Rt,Rt2> = Dm.
   2945   // Instruction details available in ARM DDI 0406C.b, A8-948.
   2946   // cond(31-28) | 1100(27-24)| 010(23-21) | op=1(20) | Rt2(19-16) |
   2947   // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
   2948   DCHECK(VfpRegisterIsAvailable(src));
   2949   DCHECK(dst1 != pc && dst2 != pc);
   2950   int vm, m;
   2951   src.split_code(&vm, &m);
   2952   emit(cond | 0xC*B24 | B22 | B20 | dst2.code()*B16 |
   2953        dst1.code()*B12 | 0xB*B8 | m*B5 | B4 | vm);
   2954 }
   2955 
   2956 
   2957 void Assembler::vmov(const SwVfpRegister dst,
   2958                      const Register src,
   2959                      const Condition cond) {
   2960   // Sn = Rt.
   2961   // Instruction details available in ARM DDI 0406A, A8-642.
   2962   // cond(31-28) | 1110(27-24)| 000(23-21) | op=0(20) | Vn(19-16) |
   2963   // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
   2964   DCHECK(src != pc);
   2965   int sn, n;
   2966   dst.split_code(&sn, &n);
   2967   emit(cond | 0xE*B24 | sn*B16 | src.code()*B12 | 0xA*B8 | n*B7 | B4);
   2968 }
   2969 
   2970 
   2971 void Assembler::vmov(const Register dst,
   2972                      const SwVfpRegister src,
   2973                      const Condition cond) {
   2974   // Rt = Sn.
   2975   // Instruction details available in ARM DDI 0406A, A8-642.
   2976   // cond(31-28) | 1110(27-24)| 000(23-21) | op=1(20) | Vn(19-16) |
   2977   // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
   2978   DCHECK(dst != pc);
   2979   int sn, n;
   2980   src.split_code(&sn, &n);
   2981   emit(cond | 0xE*B24 | B20 | sn*B16 | dst.code()*B12 | 0xA*B8 | n*B7 | B4);
   2982 }
   2983 
   2984 // Type of data to read from or write to VFP register.
   2985 // Used as specifier in generic vcvt instruction.
   2986 enum VFPType { S32, U32, F32, F64 };
   2987 
   2988 
   2989 static bool IsSignedVFPType(VFPType type) {
   2990   switch (type) {
   2991     case S32:
   2992       return true;
   2993     case U32:
   2994       return false;
   2995     default:
   2996       UNREACHABLE();
   2997   }
   2998 }
   2999 
   3000 
   3001 static bool IsIntegerVFPType(VFPType type) {
   3002   switch (type) {
   3003     case S32:
   3004     case U32:
   3005       return true;
   3006     case F32:
   3007     case F64:
   3008       return false;
   3009     default:
   3010       UNREACHABLE();
   3011   }
   3012 }
   3013 
   3014 
   3015 static bool IsDoubleVFPType(VFPType type) {
   3016   switch (type) {
   3017     case F32:
   3018       return false;
   3019     case F64:
   3020       return true;
   3021     default:
   3022       UNREACHABLE();
   3023   }
   3024 }
   3025 
   3026 
   3027 // Split five bit reg_code based on size of reg_type.
   3028 //  32-bit register codes are Vm:M
   3029 //  64-bit register codes are M:Vm
   3030 // where Vm is four bits, and M is a single bit.
   3031 static void SplitRegCode(VFPType reg_type,
   3032                          int reg_code,
   3033                          int* vm,
   3034                          int* m) {
   3035   DCHECK((reg_code >= 0) && (reg_code <= 31));
   3036   if (IsIntegerVFPType(reg_type) || !IsDoubleVFPType(reg_type)) {
   3037     SwVfpRegister::split_code(reg_code, vm, m);
   3038   } else {
   3039     DwVfpRegister::split_code(reg_code, vm, m);
   3040   }
   3041 }
   3042 
   3043 
   3044 // Encode vcvt.src_type.dst_type instruction.
   3045 static Instr EncodeVCVT(const VFPType dst_type,
   3046                         const int dst_code,
   3047                         const VFPType src_type,
   3048                         const int src_code,
   3049                         VFPConversionMode mode,
   3050                         const Condition cond) {
   3051   DCHECK(src_type != dst_type);
   3052   int D, Vd, M, Vm;
   3053   SplitRegCode(src_type, src_code, &Vm, &M);
   3054   SplitRegCode(dst_type, dst_code, &Vd, &D);
   3055 
   3056   if (IsIntegerVFPType(dst_type) || IsIntegerVFPType(src_type)) {
   3057     // Conversion between IEEE floating point and 32-bit integer.
   3058     // Instruction details available in ARM DDI 0406B, A8.6.295.
   3059     // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 1(19) | opc2(18-16) |
   3060     // Vd(15-12) | 101(11-9) | sz(8) | op(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3061     DCHECK(!IsIntegerVFPType(dst_type) || !IsIntegerVFPType(src_type));
   3062 
   3063     int sz, opc2, op;
   3064 
   3065     if (IsIntegerVFPType(dst_type)) {
   3066       opc2 = IsSignedVFPType(dst_type) ? 0x5 : 0x4;
   3067       sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0;
   3068       op = mode;
   3069     } else {
   3070       DCHECK(IsIntegerVFPType(src_type));
   3071       opc2 = 0x0;
   3072       sz = IsDoubleVFPType(dst_type) ? 0x1 : 0x0;
   3073       op = IsSignedVFPType(src_type) ? 0x1 : 0x0;
   3074     }
   3075 
   3076     return (cond | 0xE*B24 | B23 | D*B22 | 0x3*B20 | B19 | opc2*B16 |
   3077             Vd*B12 | 0x5*B9 | sz*B8 | op*B7 | B6 | M*B5 | Vm);
   3078   } else {
   3079     // Conversion between IEEE double and single precision.
   3080     // Instruction details available in ARM DDI 0406B, A8.6.298.
   3081     // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0111(19-16) |
   3082     // Vd(15-12) | 101(11-9) | sz(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3083     int sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0;
   3084     return (cond | 0xE*B24 | B23 | D*B22 | 0x3*B20 | 0x7*B16 |
   3085             Vd*B12 | 0x5*B9 | sz*B8 | B7 | B6 | M*B5 | Vm);
   3086   }
   3087 }
   3088 
   3089 
   3090 void Assembler::vcvt_f64_s32(const DwVfpRegister dst,
   3091                              const SwVfpRegister src,
   3092                              VFPConversionMode mode,
   3093                              const Condition cond) {
   3094   DCHECK(VfpRegisterIsAvailable(dst));
   3095   emit(EncodeVCVT(F64, dst.code(), S32, src.code(), mode, cond));
   3096 }
   3097 
   3098 
   3099 void Assembler::vcvt_f32_s32(const SwVfpRegister dst,
   3100                              const SwVfpRegister src,
   3101                              VFPConversionMode mode,
   3102                              const Condition cond) {
   3103   emit(EncodeVCVT(F32, dst.code(), S32, src.code(), mode, cond));
   3104 }
   3105 
   3106 
   3107 void Assembler::vcvt_f64_u32(const DwVfpRegister dst,
   3108                              const SwVfpRegister src,
   3109                              VFPConversionMode mode,
   3110                              const Condition cond) {
   3111   DCHECK(VfpRegisterIsAvailable(dst));
   3112   emit(EncodeVCVT(F64, dst.code(), U32, src.code(), mode, cond));
   3113 }
   3114 
   3115 
   3116 void Assembler::vcvt_f32_u32(const SwVfpRegister dst, const SwVfpRegister src,
   3117                              VFPConversionMode mode, const Condition cond) {
   3118   emit(EncodeVCVT(F32, dst.code(), U32, src.code(), mode, cond));
   3119 }
   3120 
   3121 
   3122 void Assembler::vcvt_s32_f32(const SwVfpRegister dst, const SwVfpRegister src,
   3123                              VFPConversionMode mode, const Condition cond) {
   3124   emit(EncodeVCVT(S32, dst.code(), F32, src.code(), mode, cond));
   3125 }
   3126 
   3127 
   3128 void Assembler::vcvt_u32_f32(const SwVfpRegister dst, const SwVfpRegister src,
   3129                              VFPConversionMode mode, const Condition cond) {
   3130   emit(EncodeVCVT(U32, dst.code(), F32, src.code(), mode, cond));
   3131 }
   3132 
   3133 
   3134 void Assembler::vcvt_s32_f64(const SwVfpRegister dst,
   3135                              const DwVfpRegister src,
   3136                              VFPConversionMode mode,
   3137                              const Condition cond) {
   3138   DCHECK(VfpRegisterIsAvailable(src));
   3139   emit(EncodeVCVT(S32, dst.code(), F64, src.code(), mode, cond));
   3140 }
   3141 
   3142 
   3143 void Assembler::vcvt_u32_f64(const SwVfpRegister dst,
   3144                              const DwVfpRegister src,
   3145                              VFPConversionMode mode,
   3146                              const Condition cond) {
   3147   DCHECK(VfpRegisterIsAvailable(src));
   3148   emit(EncodeVCVT(U32, dst.code(), F64, src.code(), mode, cond));
   3149 }
   3150 
   3151 
   3152 void Assembler::vcvt_f64_f32(const DwVfpRegister dst,
   3153                              const SwVfpRegister src,
   3154                              VFPConversionMode mode,
   3155                              const Condition cond) {
   3156   DCHECK(VfpRegisterIsAvailable(dst));
   3157   emit(EncodeVCVT(F64, dst.code(), F32, src.code(), mode, cond));
   3158 }
   3159 
   3160 
   3161 void Assembler::vcvt_f32_f64(const SwVfpRegister dst,
   3162                              const DwVfpRegister src,
   3163                              VFPConversionMode mode,
   3164                              const Condition cond) {
   3165   DCHECK(VfpRegisterIsAvailable(src));
   3166   emit(EncodeVCVT(F32, dst.code(), F64, src.code(), mode, cond));
   3167 }
   3168 
   3169 
   3170 void Assembler::vcvt_f64_s32(const DwVfpRegister dst,
   3171                              int fraction_bits,
   3172                              const Condition cond) {
   3173   // Instruction details available in ARM DDI 0406C.b, A8-874.
   3174   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 1010(19-16) | Vd(15-12) |
   3175   // 101(11-9) | sf=1(8) | sx=1(7) | 1(6) | i(5) | 0(4) | imm4(3-0)
   3176   DCHECK(IsEnabled(VFPv3));
   3177   DCHECK(VfpRegisterIsAvailable(dst));
   3178   DCHECK(fraction_bits > 0 && fraction_bits <= 32);
   3179   int vd, d;
   3180   dst.split_code(&vd, &d);
   3181   int imm5 = 32 - fraction_bits;
   3182   int i = imm5 & 1;
   3183   int imm4 = (imm5 >> 1) & 0xF;
   3184   emit(cond | 0xE*B24 | B23 | d*B22 | 0x3*B20 | B19 | 0x2*B16 |
   3185        vd*B12 | 0x5*B9 | B8 | B7 | B6 | i*B5 | imm4);
   3186 }
   3187 
   3188 
   3189 void Assembler::vneg(const DwVfpRegister dst,
   3190                      const DwVfpRegister src,
   3191                      const Condition cond) {
   3192   // Instruction details available in ARM DDI 0406C.b, A8-968.
   3193   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) |
   3194   // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3195   DCHECK(VfpRegisterIsAvailable(dst));
   3196   DCHECK(VfpRegisterIsAvailable(src));
   3197   int vd, d;
   3198   dst.split_code(&vd, &d);
   3199   int vm, m;
   3200   src.split_code(&vm, &m);
   3201 
   3202   emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | B16 | vd*B12 | 0x5*B9 | B8 | B6 |
   3203        m*B5 | vm);
   3204 }
   3205 
   3206 
   3207 void Assembler::vneg(const SwVfpRegister dst, const SwVfpRegister src,
   3208                      const Condition cond) {
   3209   // Instruction details available in ARM DDI 0406C.b, A8-968.
   3210   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) |
   3211   // 101(11-9) | sz=0(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3212   int vd, d;
   3213   dst.split_code(&vd, &d);
   3214   int vm, m;
   3215   src.split_code(&vm, &m);
   3216 
   3217   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
   3218        B6 | m * B5 | vm);
   3219 }
   3220 
   3221 
   3222 void Assembler::vabs(const DwVfpRegister dst,
   3223                      const DwVfpRegister src,
   3224                      const Condition cond) {
   3225   // Instruction details available in ARM DDI 0406C.b, A8-524.
   3226   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
   3227   // 101(11-9) | sz=1(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3228   DCHECK(VfpRegisterIsAvailable(dst));
   3229   DCHECK(VfpRegisterIsAvailable(src));
   3230   int vd, d;
   3231   dst.split_code(&vd, &d);
   3232   int vm, m;
   3233   src.split_code(&vm, &m);
   3234   emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | B7 | B6 |
   3235        m*B5 | vm);
   3236 }
   3237 
   3238 
   3239 void Assembler::vabs(const SwVfpRegister dst, const SwVfpRegister src,
   3240                      const Condition cond) {
   3241   // Instruction details available in ARM DDI 0406C.b, A8-524.
   3242   // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
   3243   // 101(11-9) | sz=0(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3244   int vd, d;
   3245   dst.split_code(&vd, &d);
   3246   int vm, m;
   3247   src.split_code(&vm, &m);
   3248   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B7 | B6 |
   3249        m * B5 | vm);
   3250 }
   3251 
   3252 
   3253 void Assembler::vadd(const DwVfpRegister dst,
   3254                      const DwVfpRegister src1,
   3255                      const DwVfpRegister src2,
   3256                      const Condition cond) {
   3257   // Dd = vadd(Dn, Dm) double precision floating point addition.
   3258   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
   3259   // Instruction details available in ARM DDI 0406C.b, A8-830.
   3260   // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
   3261   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
   3262   DCHECK(VfpRegisterIsAvailable(dst));
   3263   DCHECK(VfpRegisterIsAvailable(src1));
   3264   DCHECK(VfpRegisterIsAvailable(src2));
   3265   int vd, d;
   3266   dst.split_code(&vd, &d);
   3267   int vn, n;
   3268   src1.split_code(&vn, &n);
   3269   int vm, m;
   3270   src2.split_code(&vm, &m);
   3271   emit(cond | 0x1C*B23 | d*B22 | 0x3*B20 | vn*B16 | vd*B12 | 0x5*B9 | B8 |
   3272        n*B7 | m*B5 | vm);
   3273 }
   3274 
   3275 
   3276 void Assembler::vadd(const SwVfpRegister dst, const SwVfpRegister src1,
   3277                      const SwVfpRegister src2, const Condition cond) {
   3278   // Sd = vadd(Sn, Sm) single precision floating point addition.
   3279   // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
   3280   // Instruction details available in ARM DDI 0406C.b, A8-830.
   3281   // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
   3282   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
   3283   int vd, d;
   3284   dst.split_code(&vd, &d);
   3285   int vn, n;
   3286   src1.split_code(&vn, &n);
   3287   int vm, m;
   3288   src2.split_code(&vm, &m);
   3289   emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
   3290        0x5 * B9 | n * B7 | m * B5 | vm);
   3291 }
   3292 
   3293 
   3294 void Assembler::vsub(const DwVfpRegister dst,
   3295                      const DwVfpRegister src1,
   3296                      const DwVfpRegister src2,
   3297                      const Condition cond) {
   3298   // Dd = vsub(Dn, Dm) double precision floating point subtraction.
   3299   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
   3300   // Instruction details available in ARM DDI 0406C.b, A8-1086.
   3301   // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
   3302   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3303   DCHECK(VfpRegisterIsAvailable(dst));
   3304   DCHECK(VfpRegisterIsAvailable(src1));
   3305   DCHECK(VfpRegisterIsAvailable(src2));
   3306   int vd, d;
   3307   dst.split_code(&vd, &d);
   3308   int vn, n;
   3309   src1.split_code(&vn, &n);
   3310   int vm, m;
   3311   src2.split_code(&vm, &m);
   3312   emit(cond | 0x1C*B23 | d*B22 | 0x3*B20 | vn*B16 | vd*B12 | 0x5*B9 | B8 |
   3313        n*B7 | B6 | m*B5 | vm);
   3314 }
   3315 
   3316 
   3317 void Assembler::vsub(const SwVfpRegister dst, const SwVfpRegister src1,
   3318                      const SwVfpRegister src2, const Condition cond) {
   3319   // Sd = vsub(Sn, Sm) single precision floating point subtraction.
   3320   // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
   3321   // Instruction details available in ARM DDI 0406C.b, A8-1086.
   3322   // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
   3323   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3324   int vd, d;
   3325   dst.split_code(&vd, &d);
   3326   int vn, n;
   3327   src1.split_code(&vn, &n);
   3328   int vm, m;
   3329   src2.split_code(&vm, &m);
   3330   emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
   3331        0x5 * B9 | n * B7 | B6 | m * B5 | vm);
   3332 }
   3333 
   3334 
   3335 void Assembler::vmul(const DwVfpRegister dst,
   3336                      const DwVfpRegister src1,
   3337                      const DwVfpRegister src2,
   3338                      const Condition cond) {
   3339   // Dd = vmul(Dn, Dm) double precision floating point multiplication.
   3340   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
   3341   // Instruction details available in ARM DDI 0406C.b, A8-960.
   3342   // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) |
   3343   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
   3344   DCHECK(VfpRegisterIsAvailable(dst));
   3345   DCHECK(VfpRegisterIsAvailable(src1));
   3346   DCHECK(VfpRegisterIsAvailable(src2));
   3347   int vd, d;
   3348   dst.split_code(&vd, &d);
   3349   int vn, n;
   3350   src1.split_code(&vn, &n);
   3351   int vm, m;
   3352   src2.split_code(&vm, &m);
   3353   emit(cond | 0x1C*B23 | d*B22 | 0x2*B20 | vn*B16 | vd*B12 | 0x5*B9 | B8 |
   3354        n*B7 | m*B5 | vm);
   3355 }
   3356 
   3357 
   3358 void Assembler::vmul(const SwVfpRegister dst, const SwVfpRegister src1,
   3359                      const SwVfpRegister src2, const Condition cond) {
   3360   // Sd = vmul(Sn, Sm) single precision floating point multiplication.
   3361   // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
   3362   // Instruction details available in ARM DDI 0406C.b, A8-960.
   3363   // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) |
   3364   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
   3365   int vd, d;
   3366   dst.split_code(&vd, &d);
   3367   int vn, n;
   3368   src1.split_code(&vn, &n);
   3369   int vm, m;
   3370   src2.split_code(&vm, &m);
   3371   emit(cond | 0x1C * B23 | d * B22 | 0x2 * B20 | vn * B16 | vd * B12 |
   3372        0x5 * B9 | n * B7 | m * B5 | vm);
   3373 }
   3374 
   3375 
   3376 void Assembler::vmla(const DwVfpRegister dst,
   3377                      const DwVfpRegister src1,
   3378                      const DwVfpRegister src2,
   3379                      const Condition cond) {
   3380   // Instruction details available in ARM DDI 0406C.b, A8-932.
   3381   // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
   3382   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0)
   3383   DCHECK(VfpRegisterIsAvailable(dst));
   3384   DCHECK(VfpRegisterIsAvailable(src1));
   3385   DCHECK(VfpRegisterIsAvailable(src2));
   3386   int vd, d;
   3387   dst.split_code(&vd, &d);
   3388   int vn, n;
   3389   src1.split_code(&vn, &n);
   3390   int vm, m;
   3391   src2.split_code(&vm, &m);
   3392   emit(cond | 0x1C*B23 | d*B22 | vn*B16 | vd*B12 | 0x5*B9 | B8 | n*B7 | m*B5 |
   3393        vm);
   3394 }
   3395 
   3396 
   3397 void Assembler::vmla(const SwVfpRegister dst, const SwVfpRegister src1,
   3398                      const SwVfpRegister src2, const Condition cond) {
   3399   // Instruction details available in ARM DDI 0406C.b, A8-932.
   3400   // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
   3401   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0)
   3402   int vd, d;
   3403   dst.split_code(&vd, &d);
   3404   int vn, n;
   3405   src1.split_code(&vn, &n);
   3406   int vm, m;
   3407   src2.split_code(&vm, &m);
   3408   emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
   3409        m * B5 | vm);
   3410 }
   3411 
   3412 
   3413 void Assembler::vmls(const DwVfpRegister dst,
   3414                      const DwVfpRegister src1,
   3415                      const DwVfpRegister src2,
   3416                      const Condition cond) {
   3417   // Instruction details available in ARM DDI 0406C.b, A8-932.
   3418   // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
   3419   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0)
   3420   DCHECK(VfpRegisterIsAvailable(dst));
   3421   DCHECK(VfpRegisterIsAvailable(src1));
   3422   DCHECK(VfpRegisterIsAvailable(src2));
   3423   int vd, d;
   3424   dst.split_code(&vd, &d);
   3425   int vn, n;
   3426   src1.split_code(&vn, &n);
   3427   int vm, m;
   3428   src2.split_code(&vm, &m);
   3429   emit(cond | 0x1C*B23 | d*B22 | vn*B16 | vd*B12 | 0x5*B9 | B8 | n*B7 | B6 |
   3430        m*B5 | vm);
   3431 }
   3432 
   3433 
   3434 void Assembler::vmls(const SwVfpRegister dst, const SwVfpRegister src1,
   3435                      const SwVfpRegister src2, const Condition cond) {
   3436   // Instruction details available in ARM DDI 0406C.b, A8-932.
   3437   // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
   3438   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0)
   3439   int vd, d;
   3440   dst.split_code(&vd, &d);
   3441   int vn, n;
   3442   src1.split_code(&vn, &n);
   3443   int vm, m;
   3444   src2.split_code(&vm, &m);
   3445   emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
   3446        B6 | m * B5 | vm);
   3447 }
   3448 
   3449 
   3450 void Assembler::vdiv(const DwVfpRegister dst,
   3451                      const DwVfpRegister src1,
   3452                      const DwVfpRegister src2,
   3453                      const Condition cond) {
   3454   // Dd = vdiv(Dn, Dm) double precision floating point division.
   3455   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
   3456   // Instruction details available in ARM DDI 0406C.b, A8-882.
   3457   // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) |
   3458   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
   3459   DCHECK(VfpRegisterIsAvailable(dst));
   3460   DCHECK(VfpRegisterIsAvailable(src1));
   3461   DCHECK(VfpRegisterIsAvailable(src2));
   3462   int vd, d;
   3463   dst.split_code(&vd, &d);
   3464   int vn, n;
   3465   src1.split_code(&vn, &n);
   3466   int vm, m;
   3467   src2.split_code(&vm, &m);
   3468   emit(cond | 0x1D*B23 | d*B22 | vn*B16 | vd*B12 | 0x5*B9 | B8 | n*B7 | m*B5 |
   3469        vm);
   3470 }
   3471 
   3472 
   3473 void Assembler::vdiv(const SwVfpRegister dst, const SwVfpRegister src1,
   3474                      const SwVfpRegister src2, const Condition cond) {
   3475   // Sd = vdiv(Sn, Sm) single precision floating point division.
   3476   // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
   3477   // Instruction details available in ARM DDI 0406C.b, A8-882.
   3478   // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) |
   3479   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
   3480   int vd, d;
   3481   dst.split_code(&vd, &d);
   3482   int vn, n;
   3483   src1.split_code(&vn, &n);
   3484   int vm, m;
   3485   src2.split_code(&vm, &m);
   3486   emit(cond | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
   3487        m * B5 | vm);
   3488 }
   3489 
   3490 
   3491 void Assembler::vcmp(const DwVfpRegister src1,
   3492                      const DwVfpRegister src2,
   3493                      const Condition cond) {
   3494   // vcmp(Dd, Dm) double precision floating point comparison.
   3495   // Instruction details available in ARM DDI 0406C.b, A8-864.
   3496   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) |
   3497   // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3498   DCHECK(VfpRegisterIsAvailable(src1));
   3499   DCHECK(VfpRegisterIsAvailable(src2));
   3500   int vd, d;
   3501   src1.split_code(&vd, &d);
   3502   int vm, m;
   3503   src2.split_code(&vm, &m);
   3504   emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | 0x4*B16 | vd*B12 | 0x5*B9 | B8 | B6 |
   3505        m*B5 | vm);
   3506 }
   3507 
   3508 
   3509 void Assembler::vcmp(const SwVfpRegister src1, const SwVfpRegister src2,
   3510                      const Condition cond) {
   3511   // vcmp(Sd, Sm) single precision floating point comparison.
   3512   // Instruction details available in ARM DDI 0406C.b, A8-864.
   3513   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) |
   3514   // Vd(15-12) | 101(11-9) | sz=0(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3515   int vd, d;
   3516   src1.split_code(&vd, &d);
   3517   int vm, m;
   3518   src2.split_code(&vm, &m);
   3519   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x4 * B16 | vd * B12 |
   3520        0x5 * B9 | B6 | m * B5 | vm);
   3521 }
   3522 
   3523 
   3524 void Assembler::vcmp(const DwVfpRegister src1,
   3525                      const double src2,
   3526                      const Condition cond) {
   3527   // vcmp(Dd, #0.0) double precision floating point comparison.
   3528   // Instruction details available in ARM DDI 0406C.b, A8-864.
   3529   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) |
   3530   // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0)
   3531   DCHECK(VfpRegisterIsAvailable(src1));
   3532   DCHECK_EQ(src2, 0.0);
   3533   int vd, d;
   3534   src1.split_code(&vd, &d);
   3535   emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | 0x5*B16 | vd*B12 | 0x5*B9 | B8 | B6);
   3536 }
   3537 
   3538 
   3539 void Assembler::vcmp(const SwVfpRegister src1, const float src2,
   3540                      const Condition cond) {
   3541   // vcmp(Sd, #0.0) single precision floating point comparison.
   3542   // Instruction details available in ARM DDI 0406C.b, A8-864.
   3543   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) |
   3544   // Vd(15-12) | 101(11-9) | sz=0(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0)
   3545   DCHECK_EQ(src2, 0.0);
   3546   int vd, d;
   3547   src1.split_code(&vd, &d);
   3548   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x5 * B16 | vd * B12 |
   3549        0x5 * B9 | B6);
   3550 }
   3551 
   3552 void Assembler::vmaxnm(const DwVfpRegister dst, const DwVfpRegister src1,
   3553                        const DwVfpRegister src2) {
   3554   // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
   3555   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
   3556   DCHECK(IsEnabled(ARMv8));
   3557   int vd, d;
   3558   dst.split_code(&vd, &d);
   3559   int vn, n;
   3560   src1.split_code(&vn, &n);
   3561   int vm, m;
   3562   src2.split_code(&vm, &m);
   3563 
   3564   emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
   3565        0x5 * B9 | B8 | n * B7 | m * B5 | vm);
   3566 }
   3567 
   3568 void Assembler::vmaxnm(const SwVfpRegister dst, const SwVfpRegister src1,
   3569                        const SwVfpRegister src2) {
   3570   // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
   3571   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
   3572   DCHECK(IsEnabled(ARMv8));
   3573   int vd, d;
   3574   dst.split_code(&vd, &d);
   3575   int vn, n;
   3576   src1.split_code(&vn, &n);
   3577   int vm, m;
   3578   src2.split_code(&vm, &m);
   3579 
   3580   emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
   3581        0x5 * B9 | n * B7 | m * B5 | vm);
   3582 }
   3583 
   3584 void Assembler::vminnm(const DwVfpRegister dst, const DwVfpRegister src1,
   3585                        const DwVfpRegister src2) {
   3586   // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
   3587   // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3588   DCHECK(IsEnabled(ARMv8));
   3589   int vd, d;
   3590   dst.split_code(&vd, &d);
   3591   int vn, n;
   3592   src1.split_code(&vn, &n);
   3593   int vm, m;
   3594   src2.split_code(&vm, &m);
   3595 
   3596   emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
   3597        0x5 * B9 | B8 | n * B7 | B6 | m * B5 | vm);
   3598 }
   3599 
   3600 void Assembler::vminnm(const SwVfpRegister dst, const SwVfpRegister src1,
   3601                        const SwVfpRegister src2) {
   3602   // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
   3603   // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3604   DCHECK(IsEnabled(ARMv8));
   3605   int vd, d;
   3606   dst.split_code(&vd, &d);
   3607   int vn, n;
   3608   src1.split_code(&vn, &n);
   3609   int vm, m;
   3610   src2.split_code(&vm, &m);
   3611 
   3612   emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
   3613        0x5 * B9 | n * B7 | B6 | m * B5 | vm);
   3614 }
   3615 
   3616 void Assembler::vsel(Condition cond, const DwVfpRegister dst,
   3617                      const DwVfpRegister src1, const DwVfpRegister src2) {
   3618   // cond=kSpecialCondition(31-28) | 11100(27-23) | D(22) |
   3619   // vsel_cond=XX(21-20) | Vn(19-16) | Vd(15-12) | 101(11-9) | sz=1(8) | N(7) |
   3620   // 0(6) | M(5) | 0(4) | Vm(3-0)
   3621   DCHECK(IsEnabled(ARMv8));
   3622   int vd, d;
   3623   dst.split_code(&vd, &d);
   3624   int vn, n;
   3625   src1.split_code(&vn, &n);
   3626   int vm, m;
   3627   src2.split_code(&vm, &m);
   3628   int sz = 1;
   3629 
   3630   // VSEL has a special (restricted) condition encoding.
   3631   //   eq(0b0000)... -> 0b00
   3632   //   ge(0b1010)... -> 0b10
   3633   //   gt(0b1100)... -> 0b11
   3634   //   vs(0b0110)... -> 0b01
   3635   // No other conditions are supported.
   3636   int vsel_cond = (cond >> 30) & 0x3;
   3637   if ((cond != eq) && (cond != ge) && (cond != gt) && (cond != vs)) {
   3638     // We can implement some other conditions by swapping the inputs.
   3639     DCHECK((cond == ne) | (cond == lt) | (cond == le) | (cond == vc));
   3640     std::swap(vn, vm);
   3641     std::swap(n, m);
   3642   }
   3643 
   3644   emit(kSpecialCondition | 0x1C * B23 | d * B22 | vsel_cond * B20 | vn * B16 |
   3645        vd * B12 | 0x5 * B9 | sz * B8 | n * B7 | m * B5 | vm);
   3646 }
   3647 
   3648 void Assembler::vsel(Condition cond, const SwVfpRegister dst,
   3649                      const SwVfpRegister src1, const SwVfpRegister src2) {
   3650   // cond=kSpecialCondition(31-28) | 11100(27-23) | D(22) |
   3651   // vsel_cond=XX(21-20) | Vn(19-16) | Vd(15-12) | 101(11-9) | sz=0(8) | N(7) |
   3652   // 0(6) | M(5) | 0(4) | Vm(3-0)
   3653   DCHECK(IsEnabled(ARMv8));
   3654   int vd, d;
   3655   dst.split_code(&vd, &d);
   3656   int vn, n;
   3657   src1.split_code(&vn, &n);
   3658   int vm, m;
   3659   src2.split_code(&vm, &m);
   3660   int sz = 0;
   3661 
   3662   // VSEL has a special (restricted) condition encoding.
   3663   //   eq(0b0000)... -> 0b00
   3664   //   ge(0b1010)... -> 0b10
   3665   //   gt(0b1100)... -> 0b11
   3666   //   vs(0b0110)... -> 0b01
   3667   // No other conditions are supported.
   3668   int vsel_cond = (cond >> 30) & 0x3;
   3669   if ((cond != eq) && (cond != ge) && (cond != gt) && (cond != vs)) {
   3670     // We can implement some other conditions by swapping the inputs.
   3671     DCHECK((cond == ne) | (cond == lt) | (cond == le) | (cond == vc));
   3672     std::swap(vn, vm);
   3673     std::swap(n, m);
   3674   }
   3675 
   3676   emit(kSpecialCondition | 0x1C * B23 | d * B22 | vsel_cond * B20 | vn * B16 |
   3677        vd * B12 | 0x5 * B9 | sz * B8 | n * B7 | m * B5 | vm);
   3678 }
   3679 
   3680 void Assembler::vsqrt(const DwVfpRegister dst,
   3681                       const DwVfpRegister src,
   3682                       const Condition cond) {
   3683   // Instruction details available in ARM DDI 0406C.b, A8-1058.
   3684   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) |
   3685   // Vd(15-12) | 101(11-9) | sz=1(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0)
   3686   DCHECK(VfpRegisterIsAvailable(dst));
   3687   DCHECK(VfpRegisterIsAvailable(src));
   3688   int vd, d;
   3689   dst.split_code(&vd, &d);
   3690   int vm, m;
   3691   src.split_code(&vm, &m);
   3692   emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | B16 | vd*B12 | 0x5*B9 | B8 | 0x3*B6 |
   3693        m*B5 | vm);
   3694 }
   3695 
   3696 
   3697 void Assembler::vsqrt(const SwVfpRegister dst, const SwVfpRegister src,
   3698                       const Condition cond) {
   3699   // Instruction details available in ARM DDI 0406C.b, A8-1058.
   3700   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) |
   3701   // Vd(15-12) | 101(11-9) | sz=0(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0)
   3702   int vd, d;
   3703   dst.split_code(&vd, &d);
   3704   int vm, m;
   3705   src.split_code(&vm, &m);
   3706   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
   3707        0x3 * B6 | m * B5 | vm);
   3708 }
   3709 
   3710 
   3711 void Assembler::vmsr(Register dst, Condition cond) {
   3712   // Instruction details available in ARM DDI 0406A, A8-652.
   3713   // cond(31-28) | 1110 (27-24) | 1110(23-20)| 0001 (19-16) |
   3714   // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
   3715   emit(cond | 0xE * B24 | 0xE * B20 | B16 | dst.code() * B12 | 0xA * B8 | B4);
   3716 }
   3717 
   3718 
   3719 void Assembler::vmrs(Register dst, Condition cond) {
   3720   // Instruction details available in ARM DDI 0406A, A8-652.
   3721   // cond(31-28) | 1110 (27-24) | 1111(23-20)| 0001 (19-16) |
   3722   // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
   3723   emit(cond | 0xE * B24 | 0xF * B20 | B16 | dst.code() * B12 | 0xA * B8 | B4);
   3724 }
   3725 
   3726 
   3727 void Assembler::vrinta(const SwVfpRegister dst, const SwVfpRegister src) {
   3728   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
   3729   // 10(19-18) | RM=00(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
   3730   // M(5) | 0(4) | Vm(3-0)
   3731   DCHECK(IsEnabled(ARMv8));
   3732   int vd, d;
   3733   dst.split_code(&vd, &d);
   3734   int vm, m;
   3735   src.split_code(&vm, &m);
   3736   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 |
   3737        0x5 * B9 | B6 | m * B5 | vm);
   3738 }
   3739 
   3740 
   3741 void Assembler::vrinta(const DwVfpRegister dst, const DwVfpRegister src) {
   3742   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
   3743   // 10(19-18) | RM=00(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
   3744   // M(5) | 0(4) | Vm(3-0)
   3745   DCHECK(IsEnabled(ARMv8));
   3746   int vd, d;
   3747   dst.split_code(&vd, &d);
   3748   int vm, m;
   3749   src.split_code(&vm, &m);
   3750   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 |
   3751        0x5 * B9 | B8 | B6 | m * B5 | vm);
   3752 }
   3753 
   3754 
   3755 void Assembler::vrintn(const SwVfpRegister dst, const SwVfpRegister src) {
   3756   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
   3757   // 10(19-18) | RM=01(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
   3758   // M(5) | 0(4) | Vm(3-0)
   3759   DCHECK(IsEnabled(ARMv8));
   3760   int vd, d;
   3761   dst.split_code(&vd, &d);
   3762   int vm, m;
   3763   src.split_code(&vm, &m);
   3764   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 |
   3765        vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
   3766 }
   3767 
   3768 
   3769 void Assembler::vrintn(const DwVfpRegister dst, const DwVfpRegister src) {
   3770   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
   3771   // 10(19-18) | RM=01(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
   3772   // M(5) | 0(4) | Vm(3-0)
   3773   DCHECK(IsEnabled(ARMv8));
   3774   int vd, d;
   3775   dst.split_code(&vd, &d);
   3776   int vm, m;
   3777   src.split_code(&vm, &m);
   3778   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 |
   3779        vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
   3780 }
   3781 
   3782 
   3783 void Assembler::vrintp(const SwVfpRegister dst, const SwVfpRegister src) {
   3784   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
   3785   // 10(19-18) | RM=10(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
   3786   // M(5) | 0(4) | Vm(3-0)
   3787   DCHECK(IsEnabled(ARMv8));
   3788   int vd, d;
   3789   dst.split_code(&vd, &d);
   3790   int vm, m;
   3791   src.split_code(&vm, &m);
   3792   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
   3793        vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
   3794 }
   3795 
   3796 
   3797 void Assembler::vrintp(const DwVfpRegister dst, const DwVfpRegister src) {
   3798   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
   3799   // 10(19-18) | RM=10(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
   3800   // M(5) | 0(4) | Vm(3-0)
   3801   DCHECK(IsEnabled(ARMv8));
   3802   int vd, d;
   3803   dst.split_code(&vd, &d);
   3804   int vm, m;
   3805   src.split_code(&vm, &m);
   3806   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
   3807        vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
   3808 }
   3809 
   3810 
   3811 void Assembler::vrintm(const SwVfpRegister dst, const SwVfpRegister src) {
   3812   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
   3813   // 10(19-18) | RM=11(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
   3814   // M(5) | 0(4) | Vm(3-0)
   3815   DCHECK(IsEnabled(ARMv8));
   3816   int vd, d;
   3817   dst.split_code(&vd, &d);
   3818   int vm, m;
   3819   src.split_code(&vm, &m);
   3820   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 |
   3821        vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
   3822 }
   3823 
   3824 
   3825 void Assembler::vrintm(const DwVfpRegister dst, const DwVfpRegister src) {
   3826   // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
   3827   // 10(19-18) | RM=11(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
   3828   // M(5) | 0(4) | Vm(3-0)
   3829   DCHECK(IsEnabled(ARMv8));
   3830   int vd, d;
   3831   dst.split_code(&vd, &d);
   3832   int vm, m;
   3833   src.split_code(&vm, &m);
   3834   emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 |
   3835        vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
   3836 }
   3837 
   3838 
   3839 void Assembler::vrintz(const SwVfpRegister dst, const SwVfpRegister src,
   3840                        const Condition cond) {
   3841   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) |
   3842   // Vd(15-12) | 101(11-9) | sz=0(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3843   DCHECK(IsEnabled(ARMv8));
   3844   int vd, d;
   3845   dst.split_code(&vd, &d);
   3846   int vm, m;
   3847   src.split_code(&vm, &m);
   3848   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 |
   3849        0x5 * B9 | B7 | B6 | m * B5 | vm);
   3850 }
   3851 
   3852 
   3853 void Assembler::vrintz(const DwVfpRegister dst, const DwVfpRegister src,
   3854                        const Condition cond) {
   3855   // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) |
   3856   // Vd(15-12) | 101(11-9) | sz=1(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
   3857   DCHECK(IsEnabled(ARMv8));
   3858   int vd, d;
   3859   dst.split_code(&vd, &d);
   3860   int vm, m;
   3861   src.split_code(&vm, &m);
   3862   emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 |
   3863        0x5 * B9 | B8 | B7 | B6 | m * B5 | vm);
   3864 }
   3865 
   3866 
   3867 // Support for NEON.
   3868 
   3869 void Assembler::vld1(NeonSize size,
   3870                      const NeonListOperand& dst,
   3871                      const NeonMemOperand& src) {
   3872   // Instruction details available in ARM DDI 0406C.b, A8.8.320.
   3873   // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) |
   3874   // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
   3875   DCHECK(IsEnabled(NEON));
   3876   int vd, d;
   3877   dst.base().split_code(&vd, &d);
   3878   emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 |
   3879        dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code());
   3880 }
   3881 
   3882 void Assembler::vst1(NeonSize size, const NeonListOperand& src,
   3883                      const NeonMemOperand& dst) {
   3884   // Instruction details available in ARM DDI 0406C.b, A8.8.404.
   3885   // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) |
   3886   // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
   3887   DCHECK(IsEnabled(NEON));
   3888   int vd, d;
   3889   src.base().split_code(&vd, &d);
   3890   emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 |
   3891        size*B6 | dst.align()*B4 | dst.rm().code());
   3892 }
   3893 
   3894 
   3895 void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
   3896   // Instruction details available in ARM DDI 0406C.b, A8.8.346.
   3897   // 1111(31-28) | 001(27-25) | U(24) | 1(23) | D(22) | imm3(21-19) |
   3898   // 000(18-16) | Vd(15-12) | 101000(11-6) | M(5) | 1(4) | Vm(3-0)
   3899   DCHECK(IsEnabled(NEON));
   3900   int vd, d;
   3901   dst.split_code(&vd, &d);
   3902   int vm, m;
   3903   src.split_code(&vm, &m);
   3904   int U = NeonU(dt);
   3905   int imm3 = 1 << NeonSz(dt);
   3906   emit(0xFU * B28 | B25 | U * B24 | B23 | d * B22 | imm3 * B19 | vd * B12 |
   3907        0xA * B8 | m * B5 | B4 | vm);
   3908 }
   3909 
   3910 void Assembler::vqmovn(NeonDataType dt, DwVfpRegister dst, QwNeonRegister src) {
   3911   // Instruction details available in ARM DDI 0406C.b, A8.8.1004.
   3912   // vqmovn.<type><size> Dd, Qm. ARM vector narrowing move with saturation.
   3913   DCHECK(IsEnabled(NEON));
   3914   int vd, d;
   3915   dst.split_code(&vd, &d);
   3916   int vm, m;
   3917   src.split_code(&vm, &m);
   3918   int size = NeonSz(dt);
   3919   int u = NeonU(dt);
   3920   int op = u != 0 ? 3 : 2;
   3921   emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | 0x2 * B16 | vd * B12 |
   3922        0x2 * B8 | op * B6 | m * B5 | vm);
   3923 }
   3924 
   3925 static int EncodeScalar(NeonDataType dt, int index) {
   3926   int opc1_opc2 = 0;
   3927   DCHECK_LE(0, index);
   3928   switch (dt) {
   3929     case NeonS8:
   3930     case NeonU8:
   3931       DCHECK_GT(8, index);
   3932       opc1_opc2 = 0x8 | index;
   3933       break;
   3934     case NeonS16:
   3935     case NeonU16:
   3936       DCHECK_GT(4, index);
   3937       opc1_opc2 = 0x1 | (index << 1);
   3938       break;
   3939     case NeonS32:
   3940     case NeonU32:
   3941       DCHECK_GT(2, index);
   3942       opc1_opc2 = index << 2;
   3943       break;
   3944     default:
   3945       UNREACHABLE();
   3946       break;
   3947   }
   3948   return (opc1_opc2 >> 2) * B21 | (opc1_opc2 & 0x3) * B5;
   3949 }
   3950 
   3951 void Assembler::vmov(NeonDataType dt, DwVfpRegister dst, int index,
   3952                      Register src) {
   3953   // Instruction details available in ARM DDI 0406C.b, A8.8.940.
   3954   // vmov ARM core register to scalar.
   3955   DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
   3956   int vd, d;
   3957   dst.split_code(&vd, &d);
   3958   int opc1_opc2 = EncodeScalar(dt, index);
   3959   emit(0xEEu * B24 | vd * B16 | src.code() * B12 | 0xB * B8 | d * B7 | B4 |
   3960        opc1_opc2);
   3961 }
   3962 
   3963 void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src,
   3964                      int index) {
   3965   // Instruction details available in ARM DDI 0406C.b, A8.8.942.
   3966   // vmov Arm scalar to core register.
   3967   DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
   3968   int vn, n;
   3969   src.split_code(&vn, &n);
   3970   int opc1_opc2 = EncodeScalar(dt, index);
   3971   int u = NeonU(dt);
   3972   emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 |
   3973        n * B7 | B4 | opc1_opc2);
   3974 }
   3975 
   3976 void Assembler::vmov(QwNeonRegister dst, QwNeonRegister src) {
   3977   // Instruction details available in ARM DDI 0406C.b, A8-938.
   3978   // vmov is encoded as vorr.
   3979   vorr(dst, src, src);
   3980 }
   3981 
   3982 void Assembler::vdup(NeonSize size, QwNeonRegister dst, Register src) {
   3983   DCHECK(IsEnabled(NEON));
   3984   // Instruction details available in ARM DDI 0406C.b, A8-886.
   3985   int B = 0, E = 0;
   3986   switch (size) {
   3987     case Neon8:
   3988       B = 1;
   3989       break;
   3990     case Neon16:
   3991       E = 1;
   3992       break;
   3993     case Neon32:
   3994       break;
   3995     default:
   3996       UNREACHABLE();
   3997       break;
   3998   }
   3999   int vd, d;
   4000   dst.split_code(&vd, &d);
   4001 
   4002   emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 |
   4003        0xB * B8 | d * B7 | E * B5 | B4);
   4004 }
   4005 
   4006 enum NeonRegType { NEON_D, NEON_Q };
   4007 
   4008 void NeonSplitCode(NeonRegType type, int code, int* vm, int* m, int* encoding) {
   4009   if (type == NEON_D) {
   4010     DwVfpRegister::split_code(code, vm, m);
   4011   } else {
   4012     DCHECK_EQ(type, NEON_Q);
   4013     QwNeonRegister::split_code(code, vm, m);
   4014     *encoding |= B6;
   4015   }
   4016 }
   4017 
   4018 static Instr EncodeNeonDupOp(NeonSize size, NeonRegType reg_type, int dst_code,
   4019                              DwVfpRegister src, int index) {
   4020   DCHECK_NE(Neon64, size);
   4021   int sz = static_cast<int>(size);
   4022   DCHECK_LE(0, index);
   4023   DCHECK_GT(kSimd128Size / (1 << sz), index);
   4024   int imm4 = (1 << sz) | ((index << (sz + 1)) & 0xF);
   4025   int qbit = 0;
   4026   int vd, d;
   4027   NeonSplitCode(reg_type, dst_code, &vd, &d, &qbit);
   4028   int vm, m;
   4029   src.split_code(&vm, &m);
   4030 
   4031   return 0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 |
   4032          0x18 * B7 | qbit | m * B5 | vm;
   4033 }
   4034 
   4035 void Assembler::vdup(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
   4036                      int index) {
   4037   DCHECK(IsEnabled(NEON));
   4038   // Instruction details available in ARM DDI 0406C.b, A8-884.
   4039   emit(EncodeNeonDupOp(size, NEON_D, dst.code(), src, index));
   4040 }
   4041 
   4042 void Assembler::vdup(NeonSize size, QwNeonRegister dst, DwVfpRegister src,
   4043                      int index) {
   4044   // Instruction details available in ARM DDI 0406C.b, A8-884.
   4045   DCHECK(IsEnabled(NEON));
   4046   emit(EncodeNeonDupOp(size, NEON_Q, dst.code(), src, index));
   4047 }
   4048 
   4049 // Encode NEON vcvt.src_type.dst_type instruction.
   4050 static Instr EncodeNeonVCVT(VFPType dst_type, QwNeonRegister dst,
   4051                             VFPType src_type, QwNeonRegister src) {
   4052   DCHECK(src_type != dst_type);
   4053   DCHECK(src_type == F32 || dst_type == F32);
   4054   // Instruction details available in ARM DDI 0406C.b, A8.8.868.
   4055   int vd, d;
   4056   dst.split_code(&vd, &d);
   4057   int vm, m;
   4058   src.split_code(&vm, &m);
   4059 
   4060   int op = 0;
   4061   if (src_type == F32) {
   4062     DCHECK(dst_type == S32 || dst_type == U32);
   4063     op = dst_type == U32 ? 3 : 2;
   4064   } else {
   4065     DCHECK(src_type == S32 || src_type == U32);
   4066     op = src_type == U32 ? 1 : 0;
   4067   }
   4068 
   4069   return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 |
   4070          B6 | m * B5 | vm;
   4071 }
   4072 
   4073 void Assembler::vcvt_f32_s32(QwNeonRegister dst, QwNeonRegister src) {
   4074   DCHECK(IsEnabled(NEON));
   4075   DCHECK(VfpRegisterIsAvailable(dst));
   4076   DCHECK(VfpRegisterIsAvailable(src));
   4077   emit(EncodeNeonVCVT(F32, dst, S32, src));
   4078 }
   4079 
   4080 void Assembler::vcvt_f32_u32(QwNeonRegister dst, QwNeonRegister src) {
   4081   DCHECK(IsEnabled(NEON));
   4082   DCHECK(VfpRegisterIsAvailable(dst));
   4083   DCHECK(VfpRegisterIsAvailable(src));
   4084   emit(EncodeNeonVCVT(F32, dst, U32, src));
   4085 }
   4086 
   4087 void Assembler::vcvt_s32_f32(QwNeonRegister dst, QwNeonRegister src) {
   4088   DCHECK(IsEnabled(NEON));
   4089   DCHECK(VfpRegisterIsAvailable(dst));
   4090   DCHECK(VfpRegisterIsAvailable(src));
   4091   emit(EncodeNeonVCVT(S32, dst, F32, src));
   4092 }
   4093 
   4094 void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) {
   4095   DCHECK(IsEnabled(NEON));
   4096   DCHECK(VfpRegisterIsAvailable(dst));
   4097   DCHECK(VfpRegisterIsAvailable(src));
   4098   emit(EncodeNeonVCVT(U32, dst, F32, src));
   4099 }
   4100 
   4101 enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF };
   4102 
   4103 static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
   4104                                int dst_code, int src_code) {
   4105   int op_encoding = 0;
   4106   switch (op) {
   4107     case VMVN:
   4108       DCHECK_EQ(Neon8, size);  // size == 0 for vmvn
   4109       op_encoding = B10 | 0x3 * B7;
   4110       break;
   4111     case VSWP:
   4112       DCHECK_EQ(Neon8, size);  // size == 0 for vswp
   4113       op_encoding = B17;
   4114       break;
   4115     case VABS:
   4116       op_encoding = B16 | 0x6 * B7;
   4117       break;
   4118     case VABSF:
   4119       DCHECK_EQ(Neon32, size);
   4120       op_encoding = B16 | B10 | 0x6 * B7;
   4121       break;
   4122     case VNEG:
   4123       op_encoding = B16 | 0x7 * B7;
   4124       break;
   4125     case VNEGF:
   4126       DCHECK_EQ(Neon32, size);
   4127       op_encoding = B16 | B10 | 0x7 * B7;
   4128       break;
   4129     default:
   4130       UNREACHABLE();
   4131       break;
   4132   }
   4133   int vd, d;
   4134   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
   4135   int vm, m;
   4136   NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
   4137 
   4138   return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | vd * B12 | m * B5 |
   4139          vm | op_encoding;
   4140 }
   4141 
   4142 void Assembler::vmvn(QwNeonRegister dst, QwNeonRegister src) {
   4143   // Qd = vmvn(Qn, Qm) SIMD bitwise negate.
   4144   // Instruction details available in ARM DDI 0406C.b, A8-966.
   4145   DCHECK(IsEnabled(NEON));
   4146   emit(EncodeNeonUnaryOp(VMVN, NEON_Q, Neon8, dst.code(), src.code()));
   4147 }
   4148 
   4149 void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) {
   4150   DCHECK(IsEnabled(NEON));
   4151   // Dd = vswp(Dn, Dm) SIMD d-register swap.
   4152   // Instruction details available in ARM DDI 0406C.b, A8.8.418.
   4153   DCHECK(IsEnabled(NEON));
   4154   emit(EncodeNeonUnaryOp(VSWP, NEON_D, Neon8, dst.code(), src.code()));
   4155 }
   4156 
   4157 void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) {
   4158   // Qd = vswp(Qn, Qm) SIMD q-register swap.
   4159   // Instruction details available in ARM DDI 0406C.b, A8.8.418.
   4160   DCHECK(IsEnabled(NEON));
   4161   emit(EncodeNeonUnaryOp(VSWP, NEON_Q, Neon8, dst.code(), src.code()));
   4162 }
   4163 
   4164 void Assembler::vabs(QwNeonRegister dst, QwNeonRegister src) {
   4165   // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value.
   4166   // Instruction details available in ARM DDI 0406C.b, A8.8.824.
   4167   DCHECK(IsEnabled(NEON));
   4168   emit(EncodeNeonUnaryOp(VABSF, NEON_Q, Neon32, dst.code(), src.code()));
   4169 }
   4170 
   4171 void Assembler::vabs(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
   4172   // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value.
   4173   // Instruction details available in ARM DDI 0406C.b, A8.8.824.
   4174   DCHECK(IsEnabled(NEON));
   4175   emit(EncodeNeonUnaryOp(VABS, NEON_Q, size, dst.code(), src.code()));
   4176 }
   4177 
   4178 void Assembler::vneg(QwNeonRegister dst, QwNeonRegister src) {
   4179   // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate.
   4180   // Instruction details available in ARM DDI 0406C.b, A8.8.968.
   4181   DCHECK(IsEnabled(NEON));
   4182   emit(EncodeNeonUnaryOp(VNEGF, NEON_Q, Neon32, dst.code(), src.code()));
   4183 }
   4184 
   4185 void Assembler::vneg(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
   4186   // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate.
   4187   // Instruction details available in ARM DDI 0406C.b, A8.8.968.
   4188   DCHECK(IsEnabled(NEON));
   4189   emit(EncodeNeonUnaryOp(VNEG, NEON_Q, size, dst.code(), src.code()));
   4190 }
   4191 
   4192 enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN };
   4193 
   4194 static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, NeonRegType reg_type,
   4195                                        int dst_code, int src_code1,
   4196                                        int src_code2) {
   4197   int op_encoding = 0;
   4198   switch (op) {
   4199     case VBIC:
   4200       op_encoding = 0x1 * B20;
   4201       break;
   4202     case VBIF:
   4203       op_encoding = B24 | 0x3 * B20;
   4204       break;
   4205     case VBIT:
   4206       op_encoding = B24 | 0x2 * B20;
   4207       break;
   4208     case VBSL:
   4209       op_encoding = B24 | 0x1 * B20;
   4210       break;
   4211     case VEOR:
   4212       op_encoding = B24;
   4213       break;
   4214     case VORR:
   4215       op_encoding = 0x2 * B20;
   4216       break;
   4217     case VORN:
   4218       op_encoding = 0x3 * B20;
   4219       break;
   4220     case VAND:
   4221       // op_encoding is 0.
   4222       break;
   4223     default:
   4224       UNREACHABLE();
   4225       break;
   4226   }
   4227   int vd, d;
   4228   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
   4229   int vn, n;
   4230   NeonSplitCode(reg_type, src_code1, &vn, &n, &op_encoding);
   4231   int vm, m;
   4232   NeonSplitCode(reg_type, src_code2, &vm, &m, &op_encoding);
   4233 
   4234   return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 |
   4235          n * B7 | m * B5 | B4 | vm;
   4236 }
   4237 
   4238 void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1,
   4239                      QwNeonRegister src2) {
   4240   // Qd = vand(Qn, Qm) SIMD AND.
   4241   // Instruction details available in ARM DDI 0406C.b, A8.8.836.
   4242   DCHECK(IsEnabled(NEON));
   4243   emit(EncodeNeonBinaryBitwiseOp(VAND, NEON_Q, dst.code(), src1.code(),
   4244                                  src2.code()));
   4245 }
   4246 
   4247 void Assembler::vbsl(QwNeonRegister dst, QwNeonRegister src1,
   4248                      QwNeonRegister src2) {
   4249   // Qd = vbsl(Qn, Qm) SIMD bitwise select.
   4250   // Instruction details available in ARM DDI 0406C.b, A8-844.
   4251   DCHECK(IsEnabled(NEON));
   4252   emit(EncodeNeonBinaryBitwiseOp(VBSL, NEON_Q, dst.code(), src1.code(),
   4253                                  src2.code()));
   4254 }
   4255 
   4256 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
   4257                      DwVfpRegister src2) {
   4258   // Dd = veor(Dn, Dm) SIMD exclusive OR.
   4259   // Instruction details available in ARM DDI 0406C.b, A8.8.888.
   4260   DCHECK(IsEnabled(NEON));
   4261   emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_D, dst.code(), src1.code(),
   4262                                  src2.code()));
   4263 }
   4264 
   4265 void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
   4266                      QwNeonRegister src2) {
   4267   // Qd = veor(Qn, Qm) SIMD exclusive OR.
   4268   // Instruction details available in ARM DDI 0406C.b, A8.8.888.
   4269   DCHECK(IsEnabled(NEON));
   4270   emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_Q, dst.code(), src1.code(),
   4271                                  src2.code()));
   4272 }
   4273 
   4274 void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1,
   4275                      QwNeonRegister src2) {
   4276   // Qd = vorr(Qn, Qm) SIMD OR.
   4277   // Instruction details available in ARM DDI 0406C.b, A8.8.976.
   4278   DCHECK(IsEnabled(NEON));
   4279   emit(EncodeNeonBinaryBitwiseOp(VORR, NEON_Q, dst.code(), src1.code(),
   4280                                  src2.code()));
   4281 }
   4282 
   4283 enum FPBinOp {
   4284   VADDF,
   4285   VSUBF,
   4286   VMULF,
   4287   VMINF,
   4288   VMAXF,
   4289   VRECPS,
   4290   VRSQRTS,
   4291   VCEQF,
   4292   VCGEF,
   4293   VCGTF
   4294 };
   4295 
   4296 static Instr EncodeNeonBinOp(FPBinOp op, QwNeonRegister dst,
   4297                              QwNeonRegister src1, QwNeonRegister src2) {
   4298   int op_encoding = 0;
   4299   switch (op) {
   4300     case VADDF:
   4301       op_encoding = 0xD * B8;
   4302       break;
   4303     case VSUBF:
   4304       op_encoding = B21 | 0xD * B8;
   4305       break;
   4306     case VMULF:
   4307       op_encoding = B24 | 0xD * B8 | B4;
   4308       break;
   4309     case VMINF:
   4310       op_encoding = B21 | 0xF * B8;
   4311       break;
   4312     case VMAXF:
   4313       op_encoding = 0xF * B8;
   4314       break;
   4315     case VRECPS:
   4316       op_encoding = 0xF * B8 | B4;
   4317       break;
   4318     case VRSQRTS:
   4319       op_encoding = B21 | 0xF * B8 | B4;
   4320       break;
   4321     case VCEQF:
   4322       op_encoding = 0xE * B8;
   4323       break;
   4324     case VCGEF:
   4325       op_encoding = B24 | 0xE * B8;
   4326       break;
   4327     case VCGTF:
   4328       op_encoding = B24 | B21 | 0xE * B8;
   4329       break;
   4330     default:
   4331       UNREACHABLE();
   4332       break;
   4333   }
   4334   int vd, d;
   4335   dst.split_code(&vd, &d);
   4336   int vn, n;
   4337   src1.split_code(&vn, &n);
   4338   int vm, m;
   4339   src2.split_code(&vm, &m);
   4340   return 0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | n * B7 | B6 | m * B5 |
   4341          vm | op_encoding;
   4342 }
   4343 
   4344 enum IntegerBinOp {
   4345   VADD,
   4346   VQADD,
   4347   VSUB,
   4348   VQSUB,
   4349   VMUL,
   4350   VMIN,
   4351   VMAX,
   4352   VTST,
   4353   VCEQ,
   4354   VCGE,
   4355   VCGT
   4356 };
   4357 
   4358 static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
   4359                              QwNeonRegister dst, QwNeonRegister src1,
   4360                              QwNeonRegister src2) {
   4361   int op_encoding = 0;
   4362   switch (op) {
   4363     case VADD:
   4364       op_encoding = 0x8 * B8;
   4365       break;
   4366     case VQADD:
   4367       op_encoding = B4;
   4368       break;
   4369     case VSUB:
   4370       op_encoding = B24 | 0x8 * B8;
   4371       break;
   4372     case VQSUB:
   4373       op_encoding = 0x2 * B8 | B4;
   4374       break;
   4375     case VMUL:
   4376       op_encoding = 0x9 * B8 | B4;
   4377       break;
   4378     case VMIN:
   4379       op_encoding = 0x6 * B8 | B4;
   4380       break;
   4381     case VMAX:
   4382       op_encoding = 0x6 * B8;
   4383       break;
   4384     case VTST:
   4385       op_encoding = 0x8 * B8 | B4;
   4386       break;
   4387     case VCEQ:
   4388       op_encoding = B24 | 0x8 * B8 | B4;
   4389       break;
   4390     case VCGE:
   4391       op_encoding = 0x3 * B8 | B4;
   4392       break;
   4393     case VCGT:
   4394       op_encoding = 0x3 * B8;
   4395       break;
   4396     default:
   4397       UNREACHABLE();
   4398       break;
   4399   }
   4400   int vd, d;
   4401   dst.split_code(&vd, &d);
   4402   int vn, n;
   4403   src1.split_code(&vn, &n);
   4404   int vm, m;
   4405   src2.split_code(&vm, &m);
   4406   int size = NeonSz(dt);
   4407   int u = NeonU(dt);
   4408   return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
   4409          n * B7 | B6 | m * B5 | vm | op_encoding;
   4410 }
   4411 
   4412 static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size, QwNeonRegister dst,
   4413                              QwNeonRegister src1, QwNeonRegister src2) {
   4414   // Map NeonSize values to the signed values in NeonDataType, so the U bit
   4415   // will be 0.
   4416   return EncodeNeonBinOp(op, static_cast<NeonDataType>(size), dst, src1, src2);
   4417 }
   4418 
   4419 void Assembler::vadd(QwNeonRegister dst, QwNeonRegister src1,
   4420                      QwNeonRegister src2) {
   4421   DCHECK(IsEnabled(NEON));
   4422   // Qd = vadd(Qn, Qm) SIMD floating point addition.
   4423   // Instruction details available in ARM DDI 0406C.b, A8-830.
   4424   emit(EncodeNeonBinOp(VADDF, dst, src1, src2));
   4425 }
   4426 
   4427 void Assembler::vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
   4428                      QwNeonRegister src2) {
   4429   DCHECK(IsEnabled(NEON));
   4430   // Qd = vadd(Qn, Qm) SIMD integer addition.
   4431   // Instruction details available in ARM DDI 0406C.b, A8-828.
   4432   emit(EncodeNeonBinOp(VADD, size, dst, src1, src2));
   4433 }
   4434 
   4435 void Assembler::vqadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
   4436                       QwNeonRegister src2) {
   4437   DCHECK(IsEnabled(NEON));
   4438   // Qd = vqadd(Qn, Qm) SIMD integer saturating addition.
   4439   // Instruction details available in ARM DDI 0406C.b, A8-996.
   4440   emit(EncodeNeonBinOp(VQADD, dt, dst, src1, src2));
   4441 }
   4442 
   4443 void Assembler::vsub(QwNeonRegister dst, QwNeonRegister src1,
   4444                      QwNeonRegister src2) {
   4445   DCHECK(IsEnabled(NEON));
   4446   // Qd = vsub(Qn, Qm) SIMD floating point subtraction.
   4447   // Instruction details available in ARM DDI 0406C.b, A8-1086.
   4448   emit(EncodeNeonBinOp(VSUBF, dst, src1, src2));
   4449 }
   4450 
   4451 void Assembler::vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
   4452                      QwNeonRegister src2) {
   4453   DCHECK(IsEnabled(NEON));
   4454   // Qd = vsub(Qn, Qm) SIMD integer subtraction.
   4455   // Instruction details available in ARM DDI 0406C.b, A8-1084.
   4456   emit(EncodeNeonBinOp(VSUB, size, dst, src1, src2));
   4457 }
   4458 
   4459 void Assembler::vqsub(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
   4460                       QwNeonRegister src2) {
   4461   DCHECK(IsEnabled(NEON));
   4462   // Qd = vqsub(Qn, Qm) SIMD integer saturating subtraction.
   4463   // Instruction details available in ARM DDI 0406C.b, A8-1020.
   4464   emit(EncodeNeonBinOp(VQSUB, dt, dst, src1, src2));
   4465 }
   4466 
   4467 void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1,
   4468                      QwNeonRegister src2) {
   4469   DCHECK(IsEnabled(NEON));
   4470   // Qd = vadd(Qn, Qm) SIMD floating point multiply.
   4471   // Instruction details available in ARM DDI 0406C.b, A8-958.
   4472   emit(EncodeNeonBinOp(VMULF, dst, src1, src2));
   4473 }
   4474 
   4475 void Assembler::vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
   4476                      QwNeonRegister src2) {
   4477   DCHECK(IsEnabled(NEON));
   4478   // Qd = vadd(Qn, Qm) SIMD integer multiply.
   4479   // Instruction details available in ARM DDI 0406C.b, A8-960.
   4480   emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2));
   4481 }
   4482 
   4483 void Assembler::vmin(QwNeonRegister dst, QwNeonRegister src1,
   4484                      QwNeonRegister src2) {
   4485   DCHECK(IsEnabled(NEON));
   4486   // Qd = vmin(Qn, Qm) SIMD floating point MIN.
   4487   // Instruction details available in ARM DDI 0406C.b, A8-928.
   4488   emit(EncodeNeonBinOp(VMINF, dst, src1, src2));
   4489 }
   4490 
   4491 void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
   4492                      QwNeonRegister src2) {
   4493   DCHECK(IsEnabled(NEON));
   4494   // Qd = vmin(Qn, Qm) SIMD integer MIN.
   4495   // Instruction details available in ARM DDI 0406C.b, A8-926.
   4496   emit(EncodeNeonBinOp(VMIN, dt, dst, src1, src2));
   4497 }
   4498 
   4499 void Assembler::vmax(QwNeonRegister dst, QwNeonRegister src1,
   4500                      QwNeonRegister src2) {
   4501   DCHECK(IsEnabled(NEON));
   4502   // Qd = vmax(Qn, Qm) SIMD floating point MAX.
   4503   // Instruction details available in ARM DDI 0406C.b, A8-928.
   4504   emit(EncodeNeonBinOp(VMAXF, dst, src1, src2));
   4505 }
   4506 
   4507 void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
   4508                      QwNeonRegister src2) {
   4509   DCHECK(IsEnabled(NEON));
   4510   // Qd = vmax(Qn, Qm) SIMD integer MAX.
   4511   // Instruction details available in ARM DDI 0406C.b, A8-926.
   4512   emit(EncodeNeonBinOp(VMAX, dt, dst, src1, src2));
   4513 }
   4514 
   4515 enum NeonShiftOp { VSHL, VSHR, VSLI, VSRI };
   4516 
   4517 static Instr EncodeNeonShiftOp(NeonShiftOp op, NeonSize size, bool is_unsigned,
   4518                                NeonRegType reg_type, int dst_code, int src_code,
   4519                                int shift) {
   4520   int imm6 = 0;
   4521   int size_in_bits = kBitsPerByte << static_cast<int>(size);
   4522   int op_encoding = 0;
   4523   switch (op) {
   4524     case VSHL: {
   4525       DCHECK(shift >= 0 && size_in_bits > shift);
   4526       imm6 = size_in_bits + shift;
   4527       op_encoding = 0x5 * B8;
   4528       break;
   4529     }
   4530     case VSHR: {
   4531       DCHECK(shift > 0 && size_in_bits >= shift);
   4532       imm6 = 2 * size_in_bits - shift;
   4533       if (is_unsigned) op_encoding |= B24;
   4534       break;
   4535     }
   4536     case VSLI: {
   4537       DCHECK(shift >= 0 && size_in_bits > shift);
   4538       imm6 = size_in_bits + shift;
   4539       int L = imm6 >> 6;
   4540       imm6 &= 0x3F;
   4541       op_encoding = B24 | 0x5 * B8 | L * B7;
   4542       break;
   4543     }
   4544     case VSRI: {
   4545       DCHECK(shift > 0 && size_in_bits >= shift);
   4546       imm6 = 2 * size_in_bits - shift;
   4547       int L = imm6 >> 6;
   4548       imm6 &= 0x3F;
   4549       op_encoding = B24 | 0x4 * B8 | L * B7;
   4550       break;
   4551     }
   4552     default:
   4553       UNREACHABLE();
   4554       break;
   4555   }
   4556 
   4557   int vd, d;
   4558   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
   4559   int vm, m;
   4560   NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
   4561 
   4562   return 0x1E5U * B23 | d * B22 | imm6 * B16 | vd * B12 | m * B5 | B4 | vm |
   4563          op_encoding;
   4564 }
   4565 
   4566 void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
   4567                      int shift) {
   4568   DCHECK(IsEnabled(NEON));
   4569   // Qd = vshl(Qm, bits) SIMD shift left immediate.
   4570   // Instruction details available in ARM DDI 0406C.b, A8-1046.
   4571   emit(EncodeNeonShiftOp(VSHL, NeonDataTypeToSize(dt), false, NEON_Q,
   4572                          dst.code(), src.code(), shift));
   4573 }
   4574 
   4575 void Assembler::vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
   4576                      int shift) {
   4577   DCHECK(IsEnabled(NEON));
   4578   // Qd = vshl(Qm, bits) SIMD shift right immediate.
   4579   // Instruction details available in ARM DDI 0406C.b, A8-1052.
   4580   emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_Q,
   4581                          dst.code(), src.code(), shift));
   4582 }
   4583 
   4584 void Assembler::vsli(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
   4585                      int shift) {
   4586   DCHECK(IsEnabled(NEON));
   4587   // Dd = vsli(Dm, bits) SIMD shift left and insert.
   4588   // Instruction details available in ARM DDI 0406C.b, A8-1056.
   4589   emit(EncodeNeonShiftOp(VSLI, size, false, NEON_D, dst.code(), src.code(),
   4590                          shift));
   4591 }
   4592 
   4593 void Assembler::vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
   4594                      int shift) {
   4595   DCHECK(IsEnabled(NEON));
   4596   // Dd = vsri(Dm, bits) SIMD shift right and insert.
   4597   // Instruction details available in ARM DDI 0406C.b, A8-1062.
   4598   emit(EncodeNeonShiftOp(VSRI, size, false, NEON_D, dst.code(), src.code(),
   4599                          shift));
   4600 }
   4601 
   4602 static Instr EncodeNeonEstimateOp(bool is_rsqrt, QwNeonRegister dst,
   4603                                   QwNeonRegister src) {
   4604   int vd, d;
   4605   dst.split_code(&vd, &d);
   4606   int vm, m;
   4607   src.split_code(&vm, &m);
   4608   int rsqrt = is_rsqrt ? 1 : 0;
   4609   return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x5 * B8 |
   4610          rsqrt * B7 | B6 | m * B5 | vm;
   4611 }
   4612 
   4613 void Assembler::vrecpe(QwNeonRegister dst, QwNeonRegister src) {
   4614   DCHECK(IsEnabled(NEON));
   4615   // Qd = vrecpe(Qm) SIMD reciprocal estimate.
   4616   // Instruction details available in ARM DDI 0406C.b, A8-1024.
   4617   emit(EncodeNeonEstimateOp(false, dst, src));
   4618 }
   4619 
   4620 void Assembler::vrsqrte(QwNeonRegister dst, QwNeonRegister src) {
   4621   DCHECK(IsEnabled(NEON));
   4622   // Qd = vrsqrte(Qm) SIMD reciprocal square root estimate.
   4623   // Instruction details available in ARM DDI 0406C.b, A8-1038.
   4624   emit(EncodeNeonEstimateOp(true, dst, src));
   4625 }
   4626 
   4627 void Assembler::vrecps(QwNeonRegister dst, QwNeonRegister src1,
   4628                        QwNeonRegister src2) {
   4629   DCHECK(IsEnabled(NEON));
   4630   // Qd = vrecps(Qn, Qm) SIMD reciprocal refinement step.
   4631   // Instruction details available in ARM DDI 0406C.b, A8-1026.
   4632   emit(EncodeNeonBinOp(VRECPS, dst, src1, src2));
   4633 }
   4634 
   4635 void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1,
   4636                         QwNeonRegister src2) {
   4637   DCHECK(IsEnabled(NEON));
   4638   // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step.
   4639   // Instruction details available in ARM DDI 0406C.b, A8-1040.
   4640   emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2));
   4641 }
   4642 
   4643 enum NeonPairwiseOp { VPADD, VPMIN, VPMAX };
   4644 
   4645 static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt,
   4646                                   DwVfpRegister dst, DwVfpRegister src1,
   4647                                   DwVfpRegister src2) {
   4648   int op_encoding = 0;
   4649   switch (op) {
   4650     case VPADD:
   4651       op_encoding = 0xB * B8 | B4;
   4652       break;
   4653     case VPMIN:
   4654       op_encoding = 0xA * B8 | B4;
   4655       break;
   4656     case VPMAX:
   4657       op_encoding = 0xA * B8;
   4658       break;
   4659     default:
   4660       UNREACHABLE();
   4661       break;
   4662   }
   4663   int vd, d;
   4664   dst.split_code(&vd, &d);
   4665   int vn, n;
   4666   src1.split_code(&vn, &n);
   4667   int vm, m;
   4668   src2.split_code(&vm, &m);
   4669   int size = NeonSz(dt);
   4670   int u = NeonU(dt);
   4671   return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
   4672          n * B7 | m * B5 | vm | op_encoding;
   4673 }
   4674 
   4675 void Assembler::vpadd(DwVfpRegister dst, DwVfpRegister src1,
   4676                       DwVfpRegister src2) {
   4677   DCHECK(IsEnabled(NEON));
   4678   // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD.
   4679   // Instruction details available in ARM DDI 0406C.b, A8-982.
   4680   int vd, d;
   4681   dst.split_code(&vd, &d);
   4682   int vn, n;
   4683   src1.split_code(&vn, &n);
   4684   int vm, m;
   4685   src2.split_code(&vm, &m);
   4686 
   4687   emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 |
   4688        m * B5 | vm);
   4689 }
   4690 
   4691 void Assembler::vpadd(NeonSize size, DwVfpRegister dst, DwVfpRegister src1,
   4692                       DwVfpRegister src2) {
   4693   DCHECK(IsEnabled(NEON));
   4694   // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD.
   4695   // Instruction details available in ARM DDI 0406C.b, A8-980.
   4696   emit(EncodeNeonPairwiseOp(VPADD, NeonSizeToDataType(size), dst, src1, src2));
   4697 }
   4698 
   4699 void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
   4700                       DwVfpRegister src2) {
   4701   DCHECK(IsEnabled(NEON));
   4702   // Dd = vpmin(Dn, Dm) SIMD integer pairwise MIN.
   4703   // Instruction details available in ARM DDI 0406C.b, A8-986.
   4704   emit(EncodeNeonPairwiseOp(VPMIN, dt, dst, src1, src2));
   4705 }
   4706 
   4707 void Assembler::vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
   4708                       DwVfpRegister src2) {
   4709   DCHECK(IsEnabled(NEON));
   4710   // Dd = vpmax(Dn, Dm) SIMD integer pairwise MAX.
   4711   // Instruction details available in ARM DDI 0406C.b, A8-986.
   4712   emit(EncodeNeonPairwiseOp(VPMAX, dt, dst, src1, src2));
   4713 }
   4714 
   4715 void Assembler::vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
   4716                      QwNeonRegister src2) {
   4717   DCHECK(IsEnabled(NEON));
   4718   // Qd = vtst(Qn, Qm) SIMD test integer operands.
   4719   // Instruction details available in ARM DDI 0406C.b, A8-1098.
   4720   emit(EncodeNeonBinOp(VTST, size, dst, src1, src2));
   4721 }
   4722 
   4723 void Assembler::vceq(QwNeonRegister dst, QwNeonRegister src1,
   4724                      QwNeonRegister src2) {
   4725   DCHECK(IsEnabled(NEON));
   4726   // Qd = vceq(Qn, Qm) SIMD floating point compare equal.
   4727   // Instruction details available in ARM DDI 0406C.b, A8-844.
   4728   emit(EncodeNeonBinOp(VCEQF, dst, src1, src2));
   4729 }
   4730 
   4731 void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
   4732                      QwNeonRegister src2) {
   4733   DCHECK(IsEnabled(NEON));
   4734   // Qd = vceq(Qn, Qm) SIMD integer compare equal.
   4735   // Instruction details available in ARM DDI 0406C.b, A8-844.
   4736   emit(EncodeNeonBinOp(VCEQ, size, dst, src1, src2));
   4737 }
   4738 
   4739 void Assembler::vcge(QwNeonRegister dst, QwNeonRegister src1,
   4740                      QwNeonRegister src2) {
   4741   DCHECK(IsEnabled(NEON));
   4742   // Qd = vcge(Qn, Qm) SIMD floating point compare greater or equal.
   4743   // Instruction details available in ARM DDI 0406C.b, A8-848.
   4744   emit(EncodeNeonBinOp(VCGEF, dst, src1, src2));
   4745 }
   4746 
   4747 void Assembler::vcge(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
   4748                      QwNeonRegister src2) {
   4749   DCHECK(IsEnabled(NEON));
   4750   // Qd = vcge(Qn, Qm) SIMD integer compare greater or equal.
   4751   // Instruction details available in ARM DDI 0406C.b, A8-848.
   4752   emit(EncodeNeonBinOp(VCGE, dt, dst, src1, src2));
   4753 }
   4754 
   4755 void Assembler::vcgt(QwNeonRegister dst, QwNeonRegister src1,
   4756                      QwNeonRegister src2) {
   4757   DCHECK(IsEnabled(NEON));
   4758   // Qd = vcgt(Qn, Qm) SIMD floating point compare greater than.
   4759   // Instruction details available in ARM DDI 0406C.b, A8-852.
   4760   emit(EncodeNeonBinOp(VCGTF, dst, src1, src2));
   4761 }
   4762 
   4763 void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
   4764                      QwNeonRegister src2) {
   4765   DCHECK(IsEnabled(NEON));
   4766   // Qd = vcgt(Qn, Qm) SIMD integer compare greater than.
   4767   // Instruction details available in ARM DDI 0406C.b, A8-852.
   4768   emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2));
   4769 }
   4770 
   4771 void Assembler::vext(QwNeonRegister dst, QwNeonRegister src1,
   4772                      QwNeonRegister src2, int bytes) {
   4773   DCHECK(IsEnabled(NEON));
   4774   // Qd = vext(Qn, Qm) SIMD byte extract.
   4775   // Instruction details available in ARM DDI 0406C.b, A8-890.
   4776   int vd, d;
   4777   dst.split_code(&vd, &d);
   4778   int vn, n;
   4779   src1.split_code(&vn, &n);
   4780   int vm, m;
   4781   src2.split_code(&vm, &m);
   4782   DCHECK_GT(16, bytes);
   4783   emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 |
   4784        n * B7 | B6 | m * B5 | vm);
   4785 }
   4786 
   4787 enum NeonSizedOp { VZIP, VUZP, VREV16, VREV32, VREV64, VTRN };
   4788 
   4789 static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonRegType reg_type,
   4790                                NeonSize size, int dst_code, int src_code) {
   4791   int op_encoding = 0;
   4792   switch (op) {
   4793     case VZIP:
   4794       op_encoding = 0x2 * B16 | 0x3 * B7;
   4795       break;
   4796     case VUZP:
   4797       op_encoding = 0x2 * B16 | 0x2 * B7;
   4798       break;
   4799     case VREV16:
   4800       op_encoding = 0x2 * B7;
   4801       break;
   4802     case VREV32:
   4803       op_encoding = 0x1 * B7;
   4804       break;
   4805     case VREV64:
   4806       // op_encoding is 0;
   4807       break;
   4808     case VTRN:
   4809       op_encoding = 0x2 * B16 | B7;
   4810       break;
   4811     default:
   4812       UNREACHABLE();
   4813       break;
   4814   }
   4815   int vd, d;
   4816   NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
   4817   int vm, m;
   4818   NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
   4819 
   4820   int sz = static_cast<int>(size);
   4821   return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | m * B5 |
   4822          vm | op_encoding;
   4823 }
   4824 
   4825 void Assembler::vzip(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
   4826   if (size == Neon32) {  // vzip.32 Dd, Dm is a pseudo-op for vtrn.32 Dd, Dm.
   4827     vtrn(size, src1, src2);
   4828   } else {
   4829     DCHECK(IsEnabled(NEON));
   4830     // vzip.<size>(Dn, Dm) SIMD zip (interleave).
   4831     // Instruction details available in ARM DDI 0406C.b, A8-1102.
   4832     emit(EncodeNeonSizedOp(VZIP, NEON_D, size, src1.code(), src2.code()));
   4833   }
   4834 }
   4835 
   4836 void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
   4837   DCHECK(IsEnabled(NEON));
   4838   // vzip.<size>(Qn, Qm) SIMD zip (interleave).
   4839   // Instruction details available in ARM DDI 0406C.b, A8-1102.
   4840   emit(EncodeNeonSizedOp(VZIP, NEON_Q, size, src1.code(), src2.code()));
   4841 }
   4842 
   4843 void Assembler::vuzp(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
   4844   if (size == Neon32) {  // vuzp.32 Dd, Dm is a pseudo-op for vtrn.32 Dd, Dm.
   4845     vtrn(size, src1, src2);
   4846   } else {
   4847     DCHECK(IsEnabled(NEON));
   4848     // vuzp.<size>(Dn, Dm) SIMD un-zip (de-interleave).
   4849     // Instruction details available in ARM DDI 0406C.b, A8-1100.
   4850     emit(EncodeNeonSizedOp(VUZP, NEON_D, size, src1.code(), src2.code()));
   4851   }
   4852 }
   4853 
   4854 void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
   4855   DCHECK(IsEnabled(NEON));
   4856   // vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave).
   4857   // Instruction details available in ARM DDI 0406C.b, A8-1100.
   4858   emit(EncodeNeonSizedOp(VUZP, NEON_Q, size, src1.code(), src2.code()));
   4859 }
   4860 
   4861 void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
   4862   DCHECK(IsEnabled(NEON));
   4863   // Qd = vrev16.<size>(Qm) SIMD element reverse.
   4864   // Instruction details available in ARM DDI 0406C.b, A8-1028.
   4865   emit(EncodeNeonSizedOp(VREV16, NEON_Q, size, dst.code(), src.code()));
   4866 }
   4867 
   4868 void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
   4869   DCHECK(IsEnabled(NEON));
   4870   // Qd = vrev32.<size>(Qm) SIMD element reverse.
   4871   // Instruction details available in ARM DDI 0406C.b, A8-1028.
   4872   emit(EncodeNeonSizedOp(VREV32, NEON_Q, size, dst.code(), src.code()));
   4873 }
   4874 
   4875 void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
   4876   DCHECK(IsEnabled(NEON));
   4877   // Qd = vrev64.<size>(Qm) SIMD element reverse.
   4878   // Instruction details available in ARM DDI 0406C.b, A8-1028.
   4879   emit(EncodeNeonSizedOp(VREV64, NEON_Q, size, dst.code(), src.code()));
   4880 }
   4881 
   4882 void Assembler::vtrn(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
   4883   DCHECK(IsEnabled(NEON));
   4884   // vtrn.<size>(Dn, Dm) SIMD element transpose.
   4885   // Instruction details available in ARM DDI 0406C.b, A8-1096.
   4886   emit(EncodeNeonSizedOp(VTRN, NEON_D, size, src1.code(), src2.code()));
   4887 }
   4888 
   4889 void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
   4890   DCHECK(IsEnabled(NEON));
   4891   // vtrn.<size>(Qn, Qm) SIMD element transpose.
   4892   // Instruction details available in ARM DDI 0406C.b, A8-1096.
   4893   emit(EncodeNeonSizedOp(VTRN, NEON_Q, size, src1.code(), src2.code()));
   4894 }
   4895 
   4896 // Encode NEON vtbl / vtbx instruction.
   4897 static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list,
   4898                            DwVfpRegister index, bool vtbx) {
   4899   // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.
   4900   // Instruction details available in ARM DDI 0406C.b, A8-1094.
   4901   // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.
   4902   // Instruction details available in ARM DDI 0406C.b, A8-1094.
   4903   int vd, d;
   4904   dst.split_code(&vd, &d);
   4905   int vn, n;
   4906   list.base().split_code(&vn, &n);
   4907   int vm, m;
   4908   index.split_code(&vm, &m);
   4909   int op = vtbx ? 1 : 0;  // vtbl = 0, vtbx = 1.
   4910   return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 |
   4911          list.length() * B8 | n * B7 | op * B6 | m * B5 | vm;
   4912 }
   4913 
   4914 void Assembler::vtbl(DwVfpRegister dst, const NeonListOperand& list,
   4915                      DwVfpRegister index) {
   4916   DCHECK(IsEnabled(NEON));
   4917   emit(EncodeNeonVTB(dst, list, index, false));
   4918 }
   4919 
   4920 void Assembler::vtbx(DwVfpRegister dst, const NeonListOperand& list,
   4921                      DwVfpRegister index) {
   4922   DCHECK(IsEnabled(NEON));
   4923   emit(EncodeNeonVTB(dst, list, index, true));
   4924 }
   4925 
   4926 // Pseudo instructions.
   4927 void Assembler::nop(int type) {
   4928   // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
   4929   // some of the CPU's pipeline and has to issue. Older ARM chips simply used
   4930   // MOV Rx, Rx as NOP and it performs better even in newer CPUs.
   4931   // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode
   4932   // a type.
   4933   DCHECK(0 <= type && type <= 14);  // mov pc, pc isn't a nop.
   4934   emit(al | 13*B21 | type*B12 | type);
   4935 }
   4936 
   4937 void Assembler::pop() { add(sp, sp, Operand(kPointerSize)); }
   4938 
   4939 bool Assembler::IsMovT(Instr instr) {
   4940   instr &= ~(((kNumberOfConditions - 1) << 28) |  // Mask off conditions
   4941              ((kNumRegisters-1)*B12) |            // mask out register
   4942              EncodeMovwImmediate(0xFFFF));        // mask out immediate value
   4943   return instr == kMovtPattern;
   4944 }
   4945 
   4946 
   4947 bool Assembler::IsMovW(Instr instr) {
   4948   instr &= ~(((kNumberOfConditions - 1) << 28) |  // Mask off conditions
   4949              ((kNumRegisters-1)*B12) |            // mask out destination
   4950              EncodeMovwImmediate(0xFFFF));        // mask out immediate value
   4951   return instr == kMovwPattern;
   4952 }
   4953 
   4954 
   4955 Instr Assembler::GetMovTPattern() { return kMovtPattern; }
   4956 
   4957 
   4958 Instr Assembler::GetMovWPattern() { return kMovwPattern; }
   4959 
   4960 
   4961 Instr Assembler::EncodeMovwImmediate(uint32_t immediate) {
   4962   DCHECK_LT(immediate, 0x10000);
   4963   return ((immediate & 0xF000) << 4) | (immediate & 0xFFF);
   4964 }
   4965 
   4966 
   4967 Instr Assembler::PatchMovwImmediate(Instr instruction, uint32_t immediate) {
   4968   instruction &= ~EncodeMovwImmediate(0xFFFF);
   4969   return instruction | EncodeMovwImmediate(immediate);
   4970 }
   4971 
   4972 
   4973 int Assembler::DecodeShiftImm(Instr instr) {
   4974   int rotate = Instruction::RotateValue(instr) * 2;
   4975   int immed8 = Instruction::Immed8Value(instr);
   4976   return base::bits::RotateRight32(immed8, rotate);
   4977 }
   4978 
   4979 
   4980 Instr Assembler::PatchShiftImm(Instr instr, int immed) {
   4981   uint32_t rotate_imm = 0;
   4982   uint32_t immed_8 = 0;
   4983   bool immed_fits = FitsShifter(immed, &rotate_imm, &immed_8, nullptr);
   4984   DCHECK(immed_fits);
   4985   USE(immed_fits);
   4986   return (instr & ~kOff12Mask) | (rotate_imm << 8) | immed_8;
   4987 }
   4988 
   4989 
   4990 bool Assembler::IsNop(Instr instr, int type) {
   4991   DCHECK(0 <= type && type <= 14);  // mov pc, pc isn't a nop.
   4992   // Check for mov rx, rx where x = type.
   4993   return instr == (al | 13*B21 | type*B12 | type);
   4994 }
   4995 
   4996 
   4997 bool Assembler::IsMovImmed(Instr instr) {
   4998   return (instr & kMovImmedMask) == kMovImmedPattern;
   4999 }
   5000 
   5001 
   5002 bool Assembler::IsOrrImmed(Instr instr) {
   5003   return (instr & kOrrImmedMask) == kOrrImmedPattern;
   5004 }
   5005 
   5006 
   5007 // static
   5008 bool Assembler::ImmediateFitsAddrMode1Instruction(int32_t imm32) {
   5009   uint32_t dummy1;
   5010   uint32_t dummy2;
   5011   return FitsShifter(imm32, &dummy1, &dummy2, nullptr);
   5012 }
   5013 
   5014 
   5015 bool Assembler::ImmediateFitsAddrMode2Instruction(int32_t imm32) {
   5016   return is_uint12(abs(imm32));
   5017 }
   5018 
   5019 
   5020 // Debugging.
   5021 void Assembler::RecordConstPool(int size) {
   5022   // We only need this for debugger support, to correctly compute offsets in the
   5023   // code.
   5024   RecordRelocInfo(RelocInfo::CONST_POOL, static_cast<intptr_t>(size));
   5025 }
   5026 
   5027 
   5028 void Assembler::GrowBuffer() {
   5029   if (!own_buffer_) FATAL("external code buffer is too small");
   5030 
   5031   // Compute new buffer size.
   5032   CodeDesc desc;  // the new buffer
   5033   if (buffer_size_ < 1 * MB) {
   5034     desc.buffer_size = 2*buffer_size_;
   5035   } else {
   5036     desc.buffer_size = buffer_size_ + 1*MB;
   5037   }
   5038 
   5039   // Some internal data structures overflow for very large buffers,
   5040   // they must ensure that kMaximalBufferSize is not too large.
   5041   if (desc.buffer_size > kMaximalBufferSize) {
   5042     V8::FatalProcessOutOfMemory(nullptr, "Assembler::GrowBuffer");
   5043   }
   5044 
   5045   // Set up new buffer.
   5046   desc.buffer = NewArray<byte>(desc.buffer_size);
   5047 
   5048   desc.instr_size = pc_offset();
   5049   desc.reloc_size = (buffer_ + buffer_size_) - reloc_info_writer.pos();
   5050   desc.origin = this;
   5051 
   5052   // Copy the data.
   5053   int pc_delta = desc.buffer - buffer_;
   5054   int rc_delta = (desc.buffer + desc.buffer_size) - (buffer_ + buffer_size_);
   5055   MemMove(desc.buffer, buffer_, desc.instr_size);
   5056   MemMove(reloc_info_writer.pos() + rc_delta, reloc_info_writer.pos(),
   5057           desc.reloc_size);
   5058 
   5059   // Switch buffers.
   5060   DeleteArray(buffer_);
   5061   buffer_ = desc.buffer;
   5062   buffer_size_ = desc.buffer_size;
   5063   pc_ += pc_delta;
   5064   reloc_info_writer.Reposition(reloc_info_writer.pos() + rc_delta,
   5065                                reloc_info_writer.last_pc() + pc_delta);
   5066 
   5067   // None of our relocation types are pc relative pointing outside the code
   5068   // buffer nor pc absolute pointing inside the code buffer, so there is no need
   5069   // to relocate any emitted relocation entries.
   5070 }
   5071 
   5072 
   5073 void Assembler::db(uint8_t data) {
   5074   // db is used to write raw data. The constant pool should be emitted or
   5075   // blocked before using db.
   5076   DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
   5077   DCHECK(is_const_pool_blocked() || pending_64_bit_constants_.empty());
   5078   CheckBuffer();
   5079   *reinterpret_cast<uint8_t*>(pc_) = data;
   5080   pc_ += sizeof(uint8_t);
   5081 }
   5082 
   5083 
   5084 void Assembler::dd(uint32_t data) {
   5085   // dd is used to write raw data. The constant pool should be emitted or
   5086   // blocked before using dd.
   5087   DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
   5088   DCHECK(is_const_pool_blocked() || pending_64_bit_constants_.empty());
   5089   CheckBuffer();
   5090   *reinterpret_cast<uint32_t*>(pc_) = data;
   5091   pc_ += sizeof(uint32_t);
   5092 }
   5093 
   5094 
   5095 void Assembler::dq(uint64_t value) {
   5096   // dq is used to write raw data. The constant pool should be emitted or
   5097   // blocked before using dq.
   5098   DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
   5099   DCHECK(is_const_pool_blocked() || pending_64_bit_constants_.empty());
   5100   CheckBuffer();
   5101   *reinterpret_cast<uint64_t*>(pc_) = value;
   5102   pc_ += sizeof(uint64_t);
   5103 }
   5104 
   5105 void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data) {
   5106   if (options().disable_reloc_info_for_patching) return;
   5107   if (RelocInfo::IsNone(rmode) ||
   5108       // Don't record external references unless the heap will be serialized.
   5109       (RelocInfo::IsOnlyForSerializer(rmode) &&
   5110        !options().record_reloc_info_for_serialization && !emit_debug_code())) {
   5111     return;
   5112   }
   5113   DCHECK_GE(buffer_space(), kMaxRelocSize);  // too late to grow buffer here
   5114   RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, nullptr);
   5115   reloc_info_writer.Write(&rinfo);
   5116 }
   5117 
   5118 void Assembler::ConstantPoolAddEntry(int position, RelocInfo::Mode rmode,
   5119                                      intptr_t value) {
   5120   DCHECK(rmode != RelocInfo::COMMENT && rmode != RelocInfo::CONST_POOL);
   5121   // We can share CODE_TARGETs because we don't patch the code objects anymore,
   5122   // and we make sure we emit only one reloc info for them (thus delta patching)
   5123   // will apply the delta only once. At the moment, we do not dedup code targets
   5124   // if they are wrapped in a heap object request (value == 0).
   5125   bool sharing_ok = RelocInfo::IsShareableRelocMode(rmode) ||
   5126                     (rmode == RelocInfo::CODE_TARGET && value != 0);
   5127   DCHECK_LT(pending_32_bit_constants_.size(), kMaxNumPending32Constants);
   5128   if (pending_32_bit_constants_.empty()) {
   5129     first_const_pool_32_use_ = position;
   5130   }
   5131   ConstantPoolEntry entry(position, value, sharing_ok, rmode);
   5132 
   5133   bool shared = false;
   5134   if (sharing_ok) {
   5135     // Merge the constant, if possible.
   5136     for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
   5137       ConstantPoolEntry& current_entry = pending_32_bit_constants_[i];
   5138       if (!current_entry.sharing_ok()) continue;
   5139       if (entry.value() == current_entry.value() &&
   5140           entry.rmode() == current_entry.rmode()) {
   5141         entry.set_merged_index(i);
   5142         shared = true;
   5143         break;
   5144       }
   5145     }
   5146   }
   5147 
   5148   pending_32_bit_constants_.push_back(entry);
   5149 
   5150   // Make sure the constant pool is not emitted in place of the next
   5151   // instruction for which we just recorded relocation info.
   5152   BlockConstPoolFor(1);
   5153 
   5154   // Emit relocation info.
   5155   if (MustOutputRelocInfo(rmode, this) && !shared) {
   5156     RecordRelocInfo(rmode);
   5157   }
   5158 }
   5159 
   5160 void Assembler::BlockConstPoolFor(int instructions) {
   5161   int pc_limit = pc_offset() + instructions * kInstrSize;
   5162   if (no_const_pool_before_ < pc_limit) {
   5163     // Max pool start (if we need a jump and an alignment).
   5164 #ifdef DEBUG
   5165     int start = pc_limit + kInstrSize + 2 * kPointerSize;
   5166     DCHECK(pending_32_bit_constants_.empty() ||
   5167            (start - first_const_pool_32_use_ +
   5168                 pending_64_bit_constants_.size() * kDoubleSize <
   5169             kMaxDistToIntPool));
   5170     DCHECK(pending_64_bit_constants_.empty() ||
   5171            (start - first_const_pool_64_use_ < kMaxDistToFPPool));
   5172 #endif
   5173     no_const_pool_before_ = pc_limit;
   5174   }
   5175 
   5176   if (next_buffer_check_ < no_const_pool_before_) {
   5177     next_buffer_check_ = no_const_pool_before_;
   5178   }
   5179 }
   5180 
   5181 
   5182 void Assembler::CheckConstPool(bool force_emit, bool require_jump) {
   5183   // Some short sequence of instruction mustn't be broken up by constant pool
   5184   // emission, such sequences are protected by calls to BlockConstPoolFor and
   5185   // BlockConstPoolScope.
   5186   if (is_const_pool_blocked()) {
   5187     // Something is wrong if emission is forced and blocked at the same time.
   5188     DCHECK(!force_emit);
   5189     return;
   5190   }
   5191 
   5192   // There is nothing to do if there are no pending constant pool entries.
   5193   if (pending_32_bit_constants_.empty() && pending_64_bit_constants_.empty()) {
   5194     // Calculate the offset of the next check.
   5195     next_buffer_check_ = pc_offset() + kCheckPoolInterval;
   5196     return;
   5197   }
   5198 
   5199   // Check that the code buffer is large enough before emitting the constant
   5200   // pool (include the jump over the pool and the constant pool marker and
   5201   // the gap to the relocation information).
   5202   int jump_instr = require_jump ? kInstrSize : 0;
   5203   int size_up_to_marker = jump_instr + kInstrSize;
   5204   int estimated_size_after_marker =
   5205       pending_32_bit_constants_.size() * kPointerSize;
   5206   bool has_int_values = !pending_32_bit_constants_.empty();
   5207   bool has_fp_values = !pending_64_bit_constants_.empty();
   5208   bool require_64_bit_align = false;
   5209   if (has_fp_values) {
   5210     require_64_bit_align =
   5211         !IsAligned(reinterpret_cast<intptr_t>(pc_ + size_up_to_marker),
   5212                    kDoubleAlignment);
   5213     if (require_64_bit_align) {
   5214       estimated_size_after_marker += kInstrSize;
   5215     }
   5216     estimated_size_after_marker +=
   5217         pending_64_bit_constants_.size() * kDoubleSize;
   5218   }
   5219   int estimated_size = size_up_to_marker + estimated_size_after_marker;
   5220 
   5221   // We emit a constant pool when:
   5222   //  * requested to do so by parameter force_emit (e.g. after each function).
   5223   //  * the distance from the first instruction accessing the constant pool to
   5224   //    any of the constant pool entries will exceed its limit the next
   5225   //    time the pool is checked. This is overly restrictive, but we don't emit
   5226   //    constant pool entries in-order so it's conservatively correct.
   5227   //  * the instruction doesn't require a jump after itself to jump over the
   5228   //    constant pool, and we're getting close to running out of range.
   5229   if (!force_emit) {
   5230     DCHECK(has_fp_values || has_int_values);
   5231     bool need_emit = false;
   5232     if (has_fp_values) {
   5233       // The 64-bit constants are always emitted before the 32-bit constants, so
   5234       // we can ignore the effect of the 32-bit constants on estimated_size.
   5235       int dist64 = pc_offset() + estimated_size -
   5236                    pending_32_bit_constants_.size() * kPointerSize -
   5237                    first_const_pool_64_use_;
   5238       if ((dist64 >= kMaxDistToFPPool - kCheckPoolInterval) ||
   5239           (!require_jump && (dist64 >= kMaxDistToFPPool / 2))) {
   5240         need_emit = true;
   5241       }
   5242     }
   5243     if (has_int_values) {
   5244       int dist32 = pc_offset() + estimated_size - first_const_pool_32_use_;
   5245       if ((dist32 >= kMaxDistToIntPool - kCheckPoolInterval) ||
   5246           (!require_jump && (dist32 >= kMaxDistToIntPool / 2))) {
   5247         need_emit = true;
   5248       }
   5249     }
   5250     if (!need_emit) return;
   5251   }
   5252 
   5253   // Deduplicate constants.
   5254   int size_after_marker = estimated_size_after_marker;
   5255   for (size_t i = 0; i < pending_64_bit_constants_.size(); i++) {
   5256     ConstantPoolEntry& entry = pending_64_bit_constants_[i];
   5257     if (entry.is_merged()) size_after_marker -= kDoubleSize;
   5258   }
   5259 
   5260   for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
   5261     ConstantPoolEntry& entry = pending_32_bit_constants_[i];
   5262     if (entry.is_merged()) size_after_marker -= kPointerSize;
   5263   }
   5264 
   5265   int size = size_up_to_marker + size_after_marker;
   5266 
   5267   int needed_space = size + kGap;
   5268   while (buffer_space() <= needed_space) GrowBuffer();
   5269 
   5270   {
   5271     // Block recursive calls to CheckConstPool.
   5272     BlockConstPoolScope block_const_pool(this);
   5273     RecordComment("[ Constant Pool");
   5274     RecordConstPool(size);
   5275 
   5276     Label size_check;
   5277     bind(&size_check);
   5278 
   5279     // Emit jump over constant pool if necessary.
   5280     Label after_pool;
   5281     if (require_jump) {
   5282       b(&after_pool);
   5283     }
   5284 
   5285     // Put down constant pool marker "Undefined instruction".
   5286     // The data size helps disassembly know what to print.
   5287     emit(kConstantPoolMarker |
   5288          EncodeConstantPoolLength(size_after_marker / kPointerSize));
   5289 
   5290     if (require_64_bit_align) {
   5291       emit(kConstantPoolMarker);
   5292     }
   5293 
   5294     // Emit 64-bit constant pool entries first: their range is smaller than
   5295     // 32-bit entries.
   5296     for (size_t i = 0; i < pending_64_bit_constants_.size(); i++) {
   5297       ConstantPoolEntry& entry = pending_64_bit_constants_[i];
   5298 
   5299       Instr instr = instr_at(entry.position());
   5300       // Instruction to patch must be 'vldr rd, [pc, #offset]' with offset == 0.
   5301       DCHECK((IsVldrDPcImmediateOffset(instr) &&
   5302               GetVldrDRegisterImmediateOffset(instr) == 0));
   5303 
   5304       int delta = pc_offset() - entry.position() - Instruction::kPcLoadDelta;
   5305       DCHECK(is_uint10(delta));
   5306 
   5307       if (entry.is_merged()) {
   5308         ConstantPoolEntry& merged =
   5309             pending_64_bit_constants_[entry.merged_index()];
   5310         DCHECK(entry.value64() == merged.value64());
   5311         Instr merged_instr = instr_at(merged.position());
   5312         DCHECK(IsVldrDPcImmediateOffset(merged_instr));
   5313         delta = GetVldrDRegisterImmediateOffset(merged_instr);
   5314         delta += merged.position() - entry.position();
   5315       }
   5316       instr_at_put(entry.position(),
   5317                    SetVldrDRegisterImmediateOffset(instr, delta));
   5318       if (!entry.is_merged()) {
   5319         DCHECK(IsAligned(reinterpret_cast<intptr_t>(pc_), kDoubleAlignment));
   5320         dq(entry.value64());
   5321       }
   5322     }
   5323 
   5324     // Emit 32-bit constant pool entries.
   5325     for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
   5326       ConstantPoolEntry& entry = pending_32_bit_constants_[i];
   5327       Instr instr = instr_at(entry.position());
   5328 
   5329       // 64-bit loads shouldn't get here.
   5330       DCHECK(!IsVldrDPcImmediateOffset(instr));
   5331       DCHECK(!IsMovW(instr));
   5332       DCHECK(IsLdrPcImmediateOffset(instr) &&
   5333              GetLdrRegisterImmediateOffset(instr) == 0);
   5334 
   5335       int delta = pc_offset() - entry.position() - Instruction::kPcLoadDelta;
   5336       DCHECK(is_uint12(delta));
   5337       // 0 is the smallest delta:
   5338       //   ldr rd, [pc, #0]
   5339       //   constant pool marker
   5340       //   data
   5341 
   5342       if (entry.is_merged()) {
   5343         DCHECK(entry.sharing_ok());
   5344         ConstantPoolEntry& merged =
   5345             pending_32_bit_constants_[entry.merged_index()];
   5346         DCHECK(entry.value() == merged.value());
   5347         Instr merged_instr = instr_at(merged.position());
   5348         DCHECK(IsLdrPcImmediateOffset(merged_instr));
   5349         delta = GetLdrRegisterImmediateOffset(merged_instr);
   5350         delta += merged.position() - entry.position();
   5351       }
   5352       instr_at_put(entry.position(),
   5353                    SetLdrRegisterImmediateOffset(instr, delta));
   5354       if (!entry.is_merged()) {
   5355         emit(entry.value());
   5356       }
   5357     }
   5358 
   5359     pending_32_bit_constants_.clear();
   5360     pending_64_bit_constants_.clear();
   5361 
   5362     first_const_pool_32_use_ = -1;
   5363     first_const_pool_64_use_ = -1;
   5364 
   5365     RecordComment("]");
   5366 
   5367     DCHECK_EQ(size, SizeOfCodeGeneratedSince(&size_check));
   5368 
   5369     if (after_pool.is_linked()) {
   5370       bind(&after_pool);
   5371     }
   5372   }
   5373 
   5374   // Since a constant pool was just emitted, move the check offset forward by
   5375   // the standard interval.
   5376   next_buffer_check_ = pc_offset() + kCheckPoolInterval;
   5377 }
   5378 
   5379 PatchingAssembler::PatchingAssembler(const AssemblerOptions& options,
   5380                                      byte* address, int instructions)
   5381     : Assembler(options, address, instructions * kInstrSize + kGap) {
   5382   DCHECK_EQ(reloc_info_writer.pos(), buffer_ + buffer_size_);
   5383 }
   5384 
   5385 PatchingAssembler::~PatchingAssembler() {
   5386   // Check that we don't have any pending constant pools.
   5387   DCHECK(pending_32_bit_constants_.empty());
   5388   DCHECK(pending_64_bit_constants_.empty());
   5389 
   5390   // Check that the code was patched as expected.
   5391   DCHECK_EQ(pc_, buffer_ + buffer_size_ - kGap);
   5392   DCHECK_EQ(reloc_info_writer.pos(), buffer_ + buffer_size_);
   5393 }
   5394 
   5395 void PatchingAssembler::Emit(Address addr) { emit(static_cast<Instr>(addr)); }
   5396 
   5397 UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
   5398     : assembler_(assembler),
   5399       old_available_(*assembler->GetScratchRegisterList()),
   5400       old_available_vfp_(*assembler->GetScratchVfpRegisterList()) {}
   5401 
   5402 UseScratchRegisterScope::~UseScratchRegisterScope() {
   5403   *assembler_->GetScratchRegisterList() = old_available_;
   5404   *assembler_->GetScratchVfpRegisterList() = old_available_vfp_;
   5405 }
   5406 
   5407 Register UseScratchRegisterScope::Acquire() {
   5408   RegList* available = assembler_->GetScratchRegisterList();
   5409   DCHECK_NOT_NULL(available);
   5410   DCHECK_NE(*available, 0);
   5411   int index = static_cast<int>(base::bits::CountTrailingZeros32(*available));
   5412   Register reg = Register::from_code(index);
   5413   *available &= ~reg.bit();
   5414   return reg;
   5415 }
   5416 
   5417 }  // namespace internal
   5418 }  // namespace v8
   5419 
   5420 #endif  // V8_TARGET_ARCH_ARM
   5421