1 /* libs/pixelflinger/codeflinger/ARMAssembler.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #define LOG_TAG "ARMAssembler" 19 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <cutils/log.h> 23 #include <cutils/properties.h> 24 25 #if defined(WITH_LIB_HARDWARE) 26 #include <hardware_legacy/qemu_tracing.h> 27 #endif 28 29 #include <private/pixelflinger/ggl_context.h> 30 31 #include "ARMAssembler.h" 32 #include "CodeCache.h" 33 #include "disassem.h" 34 35 // ---------------------------------------------------------------------------- 36 37 namespace android { 38 39 // ---------------------------------------------------------------------------- 40 #if 0 41 #pragma mark - 42 #pragma mark ARMAssembler... 43 #endif 44 45 ARMAssembler::ARMAssembler(const sp<Assembly>& assembly) 46 : ARMAssemblerInterface(), 47 mAssembly(assembly) 48 { 49 mBase = mPC = (uint32_t *)assembly->base(); 50 mDuration = ggl_system_time(); 51 #if defined(WITH_LIB_HARDWARE) 52 mQemuTracing = true; 53 #endif 54 } 55 56 ARMAssembler::~ARMAssembler() 57 { 58 } 59 60 uint32_t* ARMAssembler::pc() const 61 { 62 return mPC; 63 } 64 65 uint32_t* ARMAssembler::base() const 66 { 67 return mBase; 68 } 69 70 void ARMAssembler::reset() 71 { 72 mBase = mPC = (uint32_t *)mAssembly->base(); 73 mBranchTargets.clear(); 74 mLabels.clear(); 75 mLabelsInverseMapping.clear(); 76 mComments.clear(); 77 } 78 79 int ARMAssembler::getCodegenArch() 80 { 81 return CODEGEN_ARCH_ARM; 82 } 83 84 // ---------------------------------------------------------------------------- 85 86 void ARMAssembler::disassemble(const char* name) 87 { 88 if (name) { 89 printf("%s:\n", name); 90 } 91 size_t count = pc()-base(); 92 uint32_t* i = base(); 93 while (count--) { 94 ssize_t label = mLabelsInverseMapping.indexOfKey(i); 95 if (label >= 0) { 96 printf("%s:\n", mLabelsInverseMapping.valueAt(label)); 97 } 98 ssize_t comment = mComments.indexOfKey(i); 99 if (comment >= 0) { 100 printf("; %s\n", mComments.valueAt(comment)); 101 } 102 printf("%08x: %08x ", uintptr_t(i), int(i[0])); 103 ::disassemble((uintptr_t)i); 104 i++; 105 } 106 } 107 108 void ARMAssembler::comment(const char* string) 109 { 110 mComments.add(mPC, string); 111 } 112 113 void ARMAssembler::label(const char* theLabel) 114 { 115 mLabels.add(theLabel, mPC); 116 mLabelsInverseMapping.add(mPC, theLabel); 117 } 118 119 void ARMAssembler::B(int cc, const char* label) 120 { 121 mBranchTargets.add(branch_target_t(label, mPC)); 122 *mPC++ = (cc<<28) | (0xA<<24) | 0; 123 } 124 125 void ARMAssembler::BL(int cc, const char* label) 126 { 127 mBranchTargets.add(branch_target_t(label, mPC)); 128 *mPC++ = (cc<<28) | (0xB<<24) | 0; 129 } 130 131 #if 0 132 #pragma mark - 133 #pragma mark Prolog/Epilog & Generate... 134 #endif 135 136 137 void ARMAssembler::prolog() 138 { 139 // write dummy prolog code 140 mPrologPC = mPC; 141 STM(AL, FD, SP, 1, LSAVED); 142 } 143 144 void ARMAssembler::epilog(uint32_t touched) 145 { 146 touched &= LSAVED; 147 if (touched) { 148 // write prolog code 149 uint32_t* pc = mPC; 150 mPC = mPrologPC; 151 STM(AL, FD, SP, 1, touched | LLR); 152 mPC = pc; 153 // write epilog code 154 LDM(AL, FD, SP, 1, touched | LLR); 155 BX(AL, LR); 156 } else { // heh, no registers to save! 157 // write prolog code 158 uint32_t* pc = mPC; 159 mPC = mPrologPC; 160 MOV(AL, 0, R0, R0); // NOP 161 mPC = pc; 162 // write epilog code 163 BX(AL, LR); 164 } 165 } 166 167 int ARMAssembler::generate(const char* name) 168 { 169 // fixup all the branches 170 size_t count = mBranchTargets.size(); 171 while (count--) { 172 const branch_target_t& bt = mBranchTargets[count]; 173 uint32_t* target_pc = mLabels.valueFor(bt.label); 174 LOG_ALWAYS_FATAL_IF(!target_pc, 175 "error resolving branch targets, target_pc is null"); 176 int32_t offset = int32_t(target_pc - (bt.pc+2)); 177 *bt.pc |= offset & 0xFFFFFF; 178 } 179 180 mAssembly->resize( int(pc()-base())*4 ); 181 182 // the instruction cache is flushed by CodeCache 183 const int64_t duration = ggl_system_time() - mDuration; 184 const char * const format = "generated %s (%d ins) at [%p:%p] in %lld ns\n"; 185 ALOGI(format, name, int(pc()-base()), base(), pc(), duration); 186 187 #if defined(WITH_LIB_HARDWARE) 188 if (__builtin_expect(mQemuTracing, 0)) { 189 int err = qemu_add_mapping(uintptr_t(base()), name); 190 mQemuTracing = (err >= 0); 191 } 192 #endif 193 194 char value[PROPERTY_VALUE_MAX]; 195 property_get("debug.pf.disasm", value, "0"); 196 if (atoi(value) != 0) { 197 printf(format, name, int(pc()-base()), base(), pc(), duration); 198 disassemble(name); 199 } 200 201 return NO_ERROR; 202 } 203 204 uint32_t* ARMAssembler::pcForLabel(const char* label) 205 { 206 return mLabels.valueFor(label); 207 } 208 209 // ---------------------------------------------------------------------------- 210 211 #if 0 212 #pragma mark - 213 #pragma mark Data Processing... 214 #endif 215 216 void ARMAssembler::dataProcessing(int opcode, int cc, 217 int s, int Rd, int Rn, uint32_t Op2) 218 { 219 *mPC++ = (cc<<28) | (opcode<<21) | (s<<20) | (Rn<<16) | (Rd<<12) | Op2; 220 } 221 222 #if 0 223 #pragma mark - 224 #pragma mark Multiply... 225 #endif 226 227 // multiply... 228 void ARMAssembler::MLA(int cc, int s, 229 int Rd, int Rm, int Rs, int Rn) { 230 if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; } 231 LOG_FATAL_IF(Rd==Rm, "MLA(r%u,r%u,r%u,r%u)", Rd,Rm,Rs,Rn); 232 *mPC++ = (cc<<28) | (1<<21) | (s<<20) | 233 (Rd<<16) | (Rn<<12) | (Rs<<8) | 0x90 | Rm; 234 } 235 void ARMAssembler::MUL(int cc, int s, 236 int Rd, int Rm, int Rs) { 237 if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; } 238 LOG_FATAL_IF(Rd==Rm, "MUL(r%u,r%u,r%u)", Rd,Rm,Rs); 239 *mPC++ = (cc<<28) | (s<<20) | (Rd<<16) | (Rs<<8) | 0x90 | Rm; 240 } 241 void ARMAssembler::UMULL(int cc, int s, 242 int RdLo, int RdHi, int Rm, int Rs) { 243 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, 244 "UMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); 245 *mPC++ = (cc<<28) | (1<<23) | (s<<20) | 246 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; 247 } 248 void ARMAssembler::UMUAL(int cc, int s, 249 int RdLo, int RdHi, int Rm, int Rs) { 250 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, 251 "UMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); 252 *mPC++ = (cc<<28) | (1<<23) | (1<<21) | (s<<20) | 253 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; 254 } 255 void ARMAssembler::SMULL(int cc, int s, 256 int RdLo, int RdHi, int Rm, int Rs) { 257 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, 258 "SMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); 259 *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (s<<20) | 260 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; 261 } 262 void ARMAssembler::SMUAL(int cc, int s, 263 int RdLo, int RdHi, int Rm, int Rs) { 264 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, 265 "SMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); 266 *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (1<<21) | (s<<20) | 267 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; 268 } 269 270 #if 0 271 #pragma mark - 272 #pragma mark Branches... 273 #endif 274 275 // branches... 276 void ARMAssembler::B(int cc, uint32_t* pc) 277 { 278 int32_t offset = int32_t(pc - (mPC+2)); 279 *mPC++ = (cc<<28) | (0xA<<24) | (offset & 0xFFFFFF); 280 } 281 282 void ARMAssembler::BL(int cc, uint32_t* pc) 283 { 284 int32_t offset = int32_t(pc - (mPC+2)); 285 *mPC++ = (cc<<28) | (0xB<<24) | (offset & 0xFFFFFF); 286 } 287 288 void ARMAssembler::BX(int cc, int Rn) 289 { 290 *mPC++ = (cc<<28) | 0x12FFF10 | Rn; 291 } 292 293 #if 0 294 #pragma mark - 295 #pragma mark Data Transfer... 296 #endif 297 298 // data transfert... 299 void ARMAssembler::LDR(int cc, int Rd, int Rn, uint32_t offset) { 300 *mPC++ = (cc<<28) | (1<<26) | (1<<20) | (Rn<<16) | (Rd<<12) | offset; 301 } 302 void ARMAssembler::LDRB(int cc, int Rd, int Rn, uint32_t offset) { 303 *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (1<<20) | (Rn<<16) | (Rd<<12) | offset; 304 } 305 void ARMAssembler::STR(int cc, int Rd, int Rn, uint32_t offset) { 306 *mPC++ = (cc<<28) | (1<<26) | (Rn<<16) | (Rd<<12) | offset; 307 } 308 void ARMAssembler::STRB(int cc, int Rd, int Rn, uint32_t offset) { 309 *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (Rn<<16) | (Rd<<12) | offset; 310 } 311 312 void ARMAssembler::LDRH(int cc, int Rd, int Rn, uint32_t offset) { 313 *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xB0 | offset; 314 } 315 void ARMAssembler::LDRSB(int cc, int Rd, int Rn, uint32_t offset) { 316 *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xD0 | offset; 317 } 318 void ARMAssembler::LDRSH(int cc, int Rd, int Rn, uint32_t offset) { 319 *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xF0 | offset; 320 } 321 void ARMAssembler::STRH(int cc, int Rd, int Rn, uint32_t offset) { 322 *mPC++ = (cc<<28) | (Rn<<16) | (Rd<<12) | 0xB0 | offset; 323 } 324 325 #if 0 326 #pragma mark - 327 #pragma mark Block Data Transfer... 328 #endif 329 330 // block data transfer... 331 void ARMAssembler::LDM(int cc, int dir, 332 int Rn, int W, uint32_t reg_list) 333 { // ED FD EA FA IB IA DB DA 334 const uint8_t P[8] = { 1, 0, 1, 0, 1, 0, 1, 0 }; 335 const uint8_t U[8] = { 1, 1, 0, 0, 1, 1, 0, 0 }; 336 *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) | 337 (uint32_t(U[dir])<<23) | (1<<20) | (W<<21) | (Rn<<16) | reg_list; 338 } 339 340 void ARMAssembler::STM(int cc, int dir, 341 int Rn, int W, uint32_t reg_list) 342 { // ED FD EA FA IB IA DB DA 343 const uint8_t P[8] = { 0, 1, 0, 1, 1, 0, 1, 0 }; 344 const uint8_t U[8] = { 0, 0, 1, 1, 1, 1, 0, 0 }; 345 *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) | 346 (uint32_t(U[dir])<<23) | (0<<20) | (W<<21) | (Rn<<16) | reg_list; 347 } 348 349 #if 0 350 #pragma mark - 351 #pragma mark Special... 352 #endif 353 354 // special... 355 void ARMAssembler::SWP(int cc, int Rn, int Rd, int Rm) { 356 *mPC++ = (cc<<28) | (2<<23) | (Rn<<16) | (Rd << 12) | 0x90 | Rm; 357 } 358 void ARMAssembler::SWPB(int cc, int Rn, int Rd, int Rm) { 359 *mPC++ = (cc<<28) | (2<<23) | (1<<22) | (Rn<<16) | (Rd << 12) | 0x90 | Rm; 360 } 361 void ARMAssembler::SWI(int cc, uint32_t comment) { 362 *mPC++ = (cc<<28) | (0xF<<24) | comment; 363 } 364 365 #if 0 366 #pragma mark - 367 #pragma mark DSP instructions... 368 #endif 369 370 // DSP instructions... 371 void ARMAssembler::PLD(int Rn, uint32_t offset) { 372 LOG_ALWAYS_FATAL_IF(!((offset&(1<<24)) && !(offset&(1<<21))), 373 "PLD only P=1, W=0"); 374 *mPC++ = 0xF550F000 | (Rn<<16) | offset; 375 } 376 377 void ARMAssembler::CLZ(int cc, int Rd, int Rm) 378 { 379 *mPC++ = (cc<<28) | 0x16F0F10| (Rd<<12) | Rm; 380 } 381 382 void ARMAssembler::QADD(int cc, int Rd, int Rm, int Rn) 383 { 384 *mPC++ = (cc<<28) | 0x1000050 | (Rn<<16) | (Rd<<12) | Rm; 385 } 386 387 void ARMAssembler::QDADD(int cc, int Rd, int Rm, int Rn) 388 { 389 *mPC++ = (cc<<28) | 0x1400050 | (Rn<<16) | (Rd<<12) | Rm; 390 } 391 392 void ARMAssembler::QSUB(int cc, int Rd, int Rm, int Rn) 393 { 394 *mPC++ = (cc<<28) | 0x1200050 | (Rn<<16) | (Rd<<12) | Rm; 395 } 396 397 void ARMAssembler::QDSUB(int cc, int Rd, int Rm, int Rn) 398 { 399 *mPC++ = (cc<<28) | 0x1600050 | (Rn<<16) | (Rd<<12) | Rm; 400 } 401 402 void ARMAssembler::SMUL(int cc, int xy, 403 int Rd, int Rm, int Rs) 404 { 405 *mPC++ = (cc<<28) | 0x1600080 | (Rd<<16) | (Rs<<8) | (xy<<4) | Rm; 406 } 407 408 void ARMAssembler::SMULW(int cc, int y, 409 int Rd, int Rm, int Rs) 410 { 411 *mPC++ = (cc<<28) | 0x12000A0 | (Rd<<16) | (Rs<<8) | (y<<4) | Rm; 412 } 413 414 void ARMAssembler::SMLA(int cc, int xy, 415 int Rd, int Rm, int Rs, int Rn) 416 { 417 *mPC++ = (cc<<28) | 0x1000080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (xy<<4) | Rm; 418 } 419 420 void ARMAssembler::SMLAL(int cc, int xy, 421 int RdHi, int RdLo, int Rs, int Rm) 422 { 423 *mPC++ = (cc<<28) | 0x1400080 | (RdHi<<16) | (RdLo<<12) | (Rs<<8) | (xy<<4) | Rm; 424 } 425 426 void ARMAssembler::SMLAW(int cc, int y, 427 int Rd, int Rm, int Rs, int Rn) 428 { 429 *mPC++ = (cc<<28) | 0x1200080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (y<<4) | Rm; 430 } 431 432 #if 0 433 #pragma mark - 434 #pragma mark Byte/half word extract and extend (ARMv6+ only)... 435 #endif 436 437 void ARMAssembler::UXTB16(int cc, int Rd, int Rm, int rotate) 438 { 439 *mPC++ = (cc<<28) | 0x6CF0070 | (Rd<<12) | ((rotate >> 3) << 10) | Rm; 440 } 441 #if 0 442 #pragma mark - 443 #pragma mark Bit manipulation (ARMv7+ only)... 444 #endif 445 446 // Bit manipulation (ARMv7+ only)... 447 void ARMAssembler::UBFX(int cc, int Rd, int Rn, int lsb, int width) 448 { 449 *mPC++ = (cc<<28) | 0x7E00000 | ((width-1)<<16) | (Rd<<12) | (lsb<<7) | 0x50 | Rn; 450 } 451 452 #if 0 453 #pragma mark - 454 #pragma mark Addressing modes... 455 #endif 456 457 int ARMAssembler::buildImmediate( 458 uint32_t immediate, uint32_t& rot, uint32_t& imm) 459 { 460 rot = 0; 461 imm = immediate; 462 if (imm > 0x7F) { // skip the easy cases 463 while (!(imm&3) || (imm&0xFC000000)) { 464 uint32_t newval; 465 newval = imm >> 2; 466 newval |= (imm&3) << 30; 467 imm = newval; 468 rot += 2; 469 if (rot == 32) { 470 rot = 0; 471 break; 472 } 473 } 474 } 475 rot = (16 - (rot>>1)) & 0xF; 476 477 if (imm>=0x100) 478 return -EINVAL; 479 480 if (((imm>>(rot<<1)) | (imm<<(32-(rot<<1)))) != immediate) 481 return -1; 482 483 return 0; 484 } 485 486 // shifters... 487 488 bool ARMAssembler::isValidImmediate(uint32_t immediate) 489 { 490 uint32_t rot, imm; 491 return buildImmediate(immediate, rot, imm) == 0; 492 } 493 494 uint32_t ARMAssembler::imm(uint32_t immediate) 495 { 496 uint32_t rot, imm; 497 int err = buildImmediate(immediate, rot, imm); 498 499 LOG_ALWAYS_FATAL_IF(err==-EINVAL, 500 "immediate %08x cannot be encoded", 501 immediate); 502 503 LOG_ALWAYS_FATAL_IF(err, 504 "immediate (%08x) encoding bogus!", 505 immediate); 506 507 return (1<<25) | (rot<<8) | imm; 508 } 509 510 uint32_t ARMAssembler::reg_imm(int Rm, int type, uint32_t shift) 511 { 512 return ((shift&0x1F)<<7) | ((type&0x3)<<5) | (Rm&0xF); 513 } 514 515 uint32_t ARMAssembler::reg_rrx(int Rm) 516 { 517 return (ROR<<5) | (Rm&0xF); 518 } 519 520 uint32_t ARMAssembler::reg_reg(int Rm, int type, int Rs) 521 { 522 return ((Rs&0xF)<<8) | ((type&0x3)<<5) | (1<<4) | (Rm&0xF); 523 } 524 525 // addressing modes... 526 // LDR(B)/STR(B)/PLD (immediate and Rm can be negative, which indicate U=0) 527 uint32_t ARMAssembler::immed12_pre(int32_t immed12, int W) 528 { 529 LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800, 530 "LDR(B)/STR(B)/PLD immediate too big (%08x)", 531 immed12); 532 return (1<<24) | (((uint32_t(immed12)>>31)^1)<<23) | 533 ((W&1)<<21) | (abs(immed12)&0x7FF); 534 } 535 536 uint32_t ARMAssembler::immed12_post(int32_t immed12) 537 { 538 LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800, 539 "LDR(B)/STR(B)/PLD immediate too big (%08x)", 540 immed12); 541 542 return (((uint32_t(immed12)>>31)^1)<<23) | (abs(immed12)&0x7FF); 543 } 544 545 uint32_t ARMAssembler::reg_scale_pre(int Rm, int type, 546 uint32_t shift, int W) 547 { 548 return (1<<25) | (1<<24) | 549 (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | 550 reg_imm(abs(Rm), type, shift); 551 } 552 553 uint32_t ARMAssembler::reg_scale_post(int Rm, int type, uint32_t shift) 554 { 555 return (1<<25) | (((uint32_t(Rm)>>31)^1)<<23) | reg_imm(abs(Rm), type, shift); 556 } 557 558 // LDRH/LDRSB/LDRSH/STRH (immediate and Rm can be negative, which indicate U=0) 559 uint32_t ARMAssembler::immed8_pre(int32_t immed8, int W) 560 { 561 uint32_t offset = abs(immed8); 562 563 LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100, 564 "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)", 565 immed8); 566 567 return (1<<24) | (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) | 568 ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF)); 569 } 570 571 uint32_t ARMAssembler::immed8_post(int32_t immed8) 572 { 573 uint32_t offset = abs(immed8); 574 575 LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100, 576 "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)", 577 immed8); 578 579 return (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) | 580 (((offset&0xF0)<<4) | (offset&0xF)); 581 } 582 583 uint32_t ARMAssembler::reg_pre(int Rm, int W) 584 { 585 return (1<<24) | (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | (abs(Rm)&0xF); 586 } 587 588 uint32_t ARMAssembler::reg_post(int Rm) 589 { 590 return (((uint32_t(Rm)>>31)^1)<<23) | (abs(Rm)&0xF); 591 } 592 593 }; // namespace android 594 595