1 /* libs/pixelflinger/codeflinger/ARMAssembler.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #define LOG_TAG "ARMAssembler" 19 20 #include <stdio.h> 21 #include <stdlib.h> 22 23 #include <cutils/properties.h> 24 #include <log/log.h> 25 #include <private/pixelflinger/ggl_context.h> 26 27 #include "ARMAssembler.h" 28 #include "CodeCache.h" 29 #include "disassem.h" 30 31 // ---------------------------------------------------------------------------- 32 33 namespace android { 34 35 // ---------------------------------------------------------------------------- 36 #if 0 37 #pragma mark - 38 #pragma mark ARMAssembler... 39 #endif 40 41 ARMAssembler::ARMAssembler(const sp<Assembly>& assembly) 42 : ARMAssemblerInterface(), 43 mAssembly(assembly) 44 { 45 mBase = mPC = (uint32_t *)assembly->base(); 46 mDuration = ggl_system_time(); 47 } 48 49 ARMAssembler::~ARMAssembler() 50 { 51 } 52 53 uint32_t* ARMAssembler::pc() const 54 { 55 return mPC; 56 } 57 58 uint32_t* ARMAssembler::base() const 59 { 60 return mBase; 61 } 62 63 void ARMAssembler::reset() 64 { 65 mBase = mPC = (uint32_t *)mAssembly->base(); 66 mBranchTargets.clear(); 67 mLabels.clear(); 68 mLabelsInverseMapping.clear(); 69 mComments.clear(); 70 } 71 72 int ARMAssembler::getCodegenArch() 73 { 74 return CODEGEN_ARCH_ARM; 75 } 76 77 // ---------------------------------------------------------------------------- 78 79 void ARMAssembler::disassemble(const char* name) 80 { 81 if (name) { 82 printf("%s:\n", name); 83 } 84 size_t count = pc()-base(); 85 uint32_t* i = base(); 86 while (count--) { 87 ssize_t label = mLabelsInverseMapping.indexOfKey(i); 88 if (label >= 0) { 89 printf("%s:\n", mLabelsInverseMapping.valueAt(label)); 90 } 91 ssize_t comment = mComments.indexOfKey(i); 92 if (comment >= 0) { 93 printf("; %s\n", mComments.valueAt(comment)); 94 } 95 printf("%08x: %08x ", uintptr_t(i), int(i[0])); 96 ::disassemble((uintptr_t)i); 97 i++; 98 } 99 } 100 101 void ARMAssembler::comment(const char* string) 102 { 103 mComments.add(mPC, string); 104 } 105 106 void ARMAssembler::label(const char* theLabel) 107 { 108 mLabels.add(theLabel, mPC); 109 mLabelsInverseMapping.add(mPC, theLabel); 110 } 111 112 void ARMAssembler::B(int cc, const char* label) 113 { 114 mBranchTargets.add(branch_target_t(label, mPC)); 115 *mPC++ = (cc<<28) | (0xA<<24) | 0; 116 } 117 118 void ARMAssembler::BL(int cc, const char* label) 119 { 120 mBranchTargets.add(branch_target_t(label, mPC)); 121 *mPC++ = (cc<<28) | (0xB<<24) | 0; 122 } 123 124 #if 0 125 #pragma mark - 126 #pragma mark Prolog/Epilog & Generate... 127 #endif 128 129 130 void ARMAssembler::prolog() 131 { 132 // write dummy prolog code 133 mPrologPC = mPC; 134 STM(AL, FD, SP, 1, LSAVED); 135 } 136 137 void ARMAssembler::epilog(uint32_t touched) 138 { 139 touched &= LSAVED; 140 if (touched) { 141 // write prolog code 142 uint32_t* pc = mPC; 143 mPC = mPrologPC; 144 STM(AL, FD, SP, 1, touched | LLR); 145 mPC = pc; 146 // write epilog code 147 LDM(AL, FD, SP, 1, touched | LLR); 148 BX(AL, LR); 149 } else { // heh, no registers to save! 150 // write prolog code 151 uint32_t* pc = mPC; 152 mPC = mPrologPC; 153 MOV(AL, 0, R0, R0); // NOP 154 mPC = pc; 155 // write epilog code 156 BX(AL, LR); 157 } 158 } 159 160 int ARMAssembler::generate(const char* name) 161 { 162 // fixup all the branches 163 size_t count = mBranchTargets.size(); 164 while (count--) { 165 const branch_target_t& bt = mBranchTargets[count]; 166 uint32_t* target_pc = mLabels.valueFor(bt.label); 167 LOG_ALWAYS_FATAL_IF(!target_pc, 168 "error resolving branch targets, target_pc is null"); 169 int32_t offset = int32_t(target_pc - (bt.pc+2)); 170 *bt.pc |= offset & 0xFFFFFF; 171 } 172 173 mAssembly->resize( int(pc()-base())*4 ); 174 175 // the instruction cache is flushed by CodeCache 176 const int64_t duration = ggl_system_time() - mDuration; 177 const char * const format = "generated %s (%d ins) at [%p:%p] in %lld ns\n"; 178 ALOGI(format, name, int(pc()-base()), base(), pc(), duration); 179 180 char value[PROPERTY_VALUE_MAX]; 181 property_get("debug.pf.disasm", value, "0"); 182 if (atoi(value) != 0) { 183 printf(format, name, int(pc()-base()), base(), pc(), duration); 184 disassemble(name); 185 } 186 187 return NO_ERROR; 188 } 189 190 uint32_t* ARMAssembler::pcForLabel(const char* label) 191 { 192 return mLabels.valueFor(label); 193 } 194 195 // ---------------------------------------------------------------------------- 196 197 #if 0 198 #pragma mark - 199 #pragma mark Data Processing... 200 #endif 201 202 void ARMAssembler::dataProcessing(int opcode, int cc, 203 int s, int Rd, int Rn, uint32_t Op2) 204 { 205 *mPC++ = (cc<<28) | (opcode<<21) | (s<<20) | (Rn<<16) | (Rd<<12) | Op2; 206 } 207 208 #if 0 209 #pragma mark - 210 #pragma mark Multiply... 211 #endif 212 213 // multiply... 214 void ARMAssembler::MLA(int cc, int s, 215 int Rd, int Rm, int Rs, int Rn) { 216 if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; } 217 LOG_FATAL_IF(Rd==Rm, "MLA(r%u,r%u,r%u,r%u)", Rd,Rm,Rs,Rn); 218 *mPC++ = (cc<<28) | (1<<21) | (s<<20) | 219 (Rd<<16) | (Rn<<12) | (Rs<<8) | 0x90 | Rm; 220 } 221 void ARMAssembler::MUL(int cc, int s, 222 int Rd, int Rm, int Rs) { 223 if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; } 224 LOG_FATAL_IF(Rd==Rm, "MUL(r%u,r%u,r%u)", Rd,Rm,Rs); 225 *mPC++ = (cc<<28) | (s<<20) | (Rd<<16) | (Rs<<8) | 0x90 | Rm; 226 } 227 void ARMAssembler::UMULL(int cc, int s, 228 int RdLo, int RdHi, int Rm, int Rs) { 229 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, 230 "UMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); 231 *mPC++ = (cc<<28) | (1<<23) | (s<<20) | 232 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; 233 } 234 void ARMAssembler::UMUAL(int cc, int s, 235 int RdLo, int RdHi, int Rm, int Rs) { 236 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, 237 "UMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); 238 *mPC++ = (cc<<28) | (1<<23) | (1<<21) | (s<<20) | 239 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; 240 } 241 void ARMAssembler::SMULL(int cc, int s, 242 int RdLo, int RdHi, int Rm, int Rs) { 243 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, 244 "SMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); 245 *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (s<<20) | 246 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; 247 } 248 void ARMAssembler::SMUAL(int cc, int s, 249 int RdLo, int RdHi, int Rm, int Rs) { 250 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, 251 "SMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); 252 *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (1<<21) | (s<<20) | 253 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; 254 } 255 256 #if 0 257 #pragma mark - 258 #pragma mark Branches... 259 #endif 260 261 // branches... 262 void ARMAssembler::B(int cc, uint32_t* pc) 263 { 264 int32_t offset = int32_t(pc - (mPC+2)); 265 *mPC++ = (cc<<28) | (0xA<<24) | (offset & 0xFFFFFF); 266 } 267 268 void ARMAssembler::BL(int cc, uint32_t* pc) 269 { 270 int32_t offset = int32_t(pc - (mPC+2)); 271 *mPC++ = (cc<<28) | (0xB<<24) | (offset & 0xFFFFFF); 272 } 273 274 void ARMAssembler::BX(int cc, int Rn) 275 { 276 *mPC++ = (cc<<28) | 0x12FFF10 | Rn; 277 } 278 279 #if 0 280 #pragma mark - 281 #pragma mark Data Transfer... 282 #endif 283 284 // data transfert... 285 void ARMAssembler::LDR(int cc, int Rd, int Rn, uint32_t offset) { 286 *mPC++ = (cc<<28) | (1<<26) | (1<<20) | (Rn<<16) | (Rd<<12) | offset; 287 } 288 void ARMAssembler::LDRB(int cc, int Rd, int Rn, uint32_t offset) { 289 *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (1<<20) | (Rn<<16) | (Rd<<12) | offset; 290 } 291 void ARMAssembler::STR(int cc, int Rd, int Rn, uint32_t offset) { 292 *mPC++ = (cc<<28) | (1<<26) | (Rn<<16) | (Rd<<12) | offset; 293 } 294 void ARMAssembler::STRB(int cc, int Rd, int Rn, uint32_t offset) { 295 *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (Rn<<16) | (Rd<<12) | offset; 296 } 297 298 void ARMAssembler::LDRH(int cc, int Rd, int Rn, uint32_t offset) { 299 *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xB0 | offset; 300 } 301 void ARMAssembler::LDRSB(int cc, int Rd, int Rn, uint32_t offset) { 302 *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xD0 | offset; 303 } 304 void ARMAssembler::LDRSH(int cc, int Rd, int Rn, uint32_t offset) { 305 *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xF0 | offset; 306 } 307 void ARMAssembler::STRH(int cc, int Rd, int Rn, uint32_t offset) { 308 *mPC++ = (cc<<28) | (Rn<<16) | (Rd<<12) | 0xB0 | offset; 309 } 310 311 #if 0 312 #pragma mark - 313 #pragma mark Block Data Transfer... 314 #endif 315 316 // block data transfer... 317 void ARMAssembler::LDM(int cc, int dir, 318 int Rn, int W, uint32_t reg_list) 319 { // ED FD EA FA IB IA DB DA 320 const uint8_t P[8] = { 1, 0, 1, 0, 1, 0, 1, 0 }; 321 const uint8_t U[8] = { 1, 1, 0, 0, 1, 1, 0, 0 }; 322 *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) | 323 (uint32_t(U[dir])<<23) | (1<<20) | (W<<21) | (Rn<<16) | reg_list; 324 } 325 326 void ARMAssembler::STM(int cc, int dir, 327 int Rn, int W, uint32_t reg_list) 328 { // ED FD EA FA IB IA DB DA 329 const uint8_t P[8] = { 0, 1, 0, 1, 1, 0, 1, 0 }; 330 const uint8_t U[8] = { 0, 0, 1, 1, 1, 1, 0, 0 }; 331 *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) | 332 (uint32_t(U[dir])<<23) | (0<<20) | (W<<21) | (Rn<<16) | reg_list; 333 } 334 335 #if 0 336 #pragma mark - 337 #pragma mark Special... 338 #endif 339 340 // special... 341 void ARMAssembler::SWP(int cc, int Rn, int Rd, int Rm) { 342 *mPC++ = (cc<<28) | (2<<23) | (Rn<<16) | (Rd << 12) | 0x90 | Rm; 343 } 344 void ARMAssembler::SWPB(int cc, int Rn, int Rd, int Rm) { 345 *mPC++ = (cc<<28) | (2<<23) | (1<<22) | (Rn<<16) | (Rd << 12) | 0x90 | Rm; 346 } 347 void ARMAssembler::SWI(int cc, uint32_t comment) { 348 *mPC++ = (cc<<28) | (0xF<<24) | comment; 349 } 350 351 #if 0 352 #pragma mark - 353 #pragma mark DSP instructions... 354 #endif 355 356 // DSP instructions... 357 void ARMAssembler::PLD(int Rn, uint32_t offset) { 358 LOG_ALWAYS_FATAL_IF(!((offset&(1<<24)) && !(offset&(1<<21))), 359 "PLD only P=1, W=0"); 360 *mPC++ = 0xF550F000 | (Rn<<16) | offset; 361 } 362 363 void ARMAssembler::CLZ(int cc, int Rd, int Rm) 364 { 365 *mPC++ = (cc<<28) | 0x16F0F10| (Rd<<12) | Rm; 366 } 367 368 void ARMAssembler::QADD(int cc, int Rd, int Rm, int Rn) 369 { 370 *mPC++ = (cc<<28) | 0x1000050 | (Rn<<16) | (Rd<<12) | Rm; 371 } 372 373 void ARMAssembler::QDADD(int cc, int Rd, int Rm, int Rn) 374 { 375 *mPC++ = (cc<<28) | 0x1400050 | (Rn<<16) | (Rd<<12) | Rm; 376 } 377 378 void ARMAssembler::QSUB(int cc, int Rd, int Rm, int Rn) 379 { 380 *mPC++ = (cc<<28) | 0x1200050 | (Rn<<16) | (Rd<<12) | Rm; 381 } 382 383 void ARMAssembler::QDSUB(int cc, int Rd, int Rm, int Rn) 384 { 385 *mPC++ = (cc<<28) | 0x1600050 | (Rn<<16) | (Rd<<12) | Rm; 386 } 387 388 void ARMAssembler::SMUL(int cc, int xy, 389 int Rd, int Rm, int Rs) 390 { 391 *mPC++ = (cc<<28) | 0x1600080 | (Rd<<16) | (Rs<<8) | (xy<<4) | Rm; 392 } 393 394 void ARMAssembler::SMULW(int cc, int y, 395 int Rd, int Rm, int Rs) 396 { 397 *mPC++ = (cc<<28) | 0x12000A0 | (Rd<<16) | (Rs<<8) | (y<<4) | Rm; 398 } 399 400 void ARMAssembler::SMLA(int cc, int xy, 401 int Rd, int Rm, int Rs, int Rn) 402 { 403 *mPC++ = (cc<<28) | 0x1000080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (xy<<4) | Rm; 404 } 405 406 void ARMAssembler::SMLAL(int cc, int xy, 407 int RdHi, int RdLo, int Rs, int Rm) 408 { 409 *mPC++ = (cc<<28) | 0x1400080 | (RdHi<<16) | (RdLo<<12) | (Rs<<8) | (xy<<4) | Rm; 410 } 411 412 void ARMAssembler::SMLAW(int cc, int y, 413 int Rd, int Rm, int Rs, int Rn) 414 { 415 *mPC++ = (cc<<28) | 0x1200080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (y<<4) | Rm; 416 } 417 418 #if 0 419 #pragma mark - 420 #pragma mark Byte/half word extract and extend (ARMv6+ only)... 421 #endif 422 423 void ARMAssembler::UXTB16(int cc, int Rd, int Rm, int rotate) 424 { 425 *mPC++ = (cc<<28) | 0x6CF0070 | (Rd<<12) | ((rotate >> 3) << 10) | Rm; 426 } 427 #if 0 428 #pragma mark - 429 #pragma mark Bit manipulation (ARMv7+ only)... 430 #endif 431 432 // Bit manipulation (ARMv7+ only)... 433 void ARMAssembler::UBFX(int cc, int Rd, int Rn, int lsb, int width) 434 { 435 *mPC++ = (cc<<28) | 0x7E00000 | ((width-1)<<16) | (Rd<<12) | (lsb<<7) | 0x50 | Rn; 436 } 437 438 #if 0 439 #pragma mark - 440 #pragma mark Addressing modes... 441 #endif 442 443 int ARMAssembler::buildImmediate( 444 uint32_t immediate, uint32_t& rot, uint32_t& imm) 445 { 446 rot = 0; 447 imm = immediate; 448 if (imm > 0x7F) { // skip the easy cases 449 while (!(imm&3) || (imm&0xFC000000)) { 450 uint32_t newval; 451 newval = imm >> 2; 452 newval |= (imm&3) << 30; 453 imm = newval; 454 rot += 2; 455 if (rot == 32) { 456 rot = 0; 457 break; 458 } 459 } 460 } 461 rot = (16 - (rot>>1)) & 0xF; 462 463 if (imm>=0x100) 464 return -EINVAL; 465 466 if (((imm>>(rot<<1)) | (imm<<(32-(rot<<1)))) != immediate) 467 return -1; 468 469 return 0; 470 } 471 472 // shifters... 473 474 bool ARMAssembler::isValidImmediate(uint32_t immediate) 475 { 476 uint32_t rot, imm; 477 return buildImmediate(immediate, rot, imm) == 0; 478 } 479 480 uint32_t ARMAssembler::imm(uint32_t immediate) 481 { 482 uint32_t rot, imm; 483 int err = buildImmediate(immediate, rot, imm); 484 485 LOG_ALWAYS_FATAL_IF(err==-EINVAL, 486 "immediate %08x cannot be encoded", 487 immediate); 488 489 LOG_ALWAYS_FATAL_IF(err, 490 "immediate (%08x) encoding bogus!", 491 immediate); 492 493 return (1<<25) | (rot<<8) | imm; 494 } 495 496 uint32_t ARMAssembler::reg_imm(int Rm, int type, uint32_t shift) 497 { 498 return ((shift&0x1F)<<7) | ((type&0x3)<<5) | (Rm&0xF); 499 } 500 501 uint32_t ARMAssembler::reg_rrx(int Rm) 502 { 503 return (ROR<<5) | (Rm&0xF); 504 } 505 506 uint32_t ARMAssembler::reg_reg(int Rm, int type, int Rs) 507 { 508 return ((Rs&0xF)<<8) | ((type&0x3)<<5) | (1<<4) | (Rm&0xF); 509 } 510 511 // addressing modes... 512 // LDR(B)/STR(B)/PLD (immediate and Rm can be negative, which indicate U=0) 513 uint32_t ARMAssembler::immed12_pre(int32_t immed12, int W) 514 { 515 LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800, 516 "LDR(B)/STR(B)/PLD immediate too big (%08x)", 517 immed12); 518 return (1<<24) | (((uint32_t(immed12)>>31)^1)<<23) | 519 ((W&1)<<21) | (abs(immed12)&0x7FF); 520 } 521 522 uint32_t ARMAssembler::immed12_post(int32_t immed12) 523 { 524 LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800, 525 "LDR(B)/STR(B)/PLD immediate too big (%08x)", 526 immed12); 527 528 return (((uint32_t(immed12)>>31)^1)<<23) | (abs(immed12)&0x7FF); 529 } 530 531 uint32_t ARMAssembler::reg_scale_pre(int Rm, int type, 532 uint32_t shift, int W) 533 { 534 return (1<<25) | (1<<24) | 535 (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | 536 reg_imm(abs(Rm), type, shift); 537 } 538 539 uint32_t ARMAssembler::reg_scale_post(int Rm, int type, uint32_t shift) 540 { 541 return (1<<25) | (((uint32_t(Rm)>>31)^1)<<23) | reg_imm(abs(Rm), type, shift); 542 } 543 544 // LDRH/LDRSB/LDRSH/STRH (immediate and Rm can be negative, which indicate U=0) 545 uint32_t ARMAssembler::immed8_pre(int32_t immed8, int W) 546 { 547 uint32_t offset = abs(immed8); 548 549 LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100, 550 "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)", 551 immed8); 552 553 return (1<<24) | (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) | 554 ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF)); 555 } 556 557 uint32_t ARMAssembler::immed8_post(int32_t immed8) 558 { 559 uint32_t offset = abs(immed8); 560 561 LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100, 562 "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)", 563 immed8); 564 565 return (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) | 566 (((offset&0xF0)<<4) | (offset&0xF)); 567 } 568 569 uint32_t ARMAssembler::reg_pre(int Rm, int W) 570 { 571 return (1<<24) | (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | (abs(Rm)&0xF); 572 } 573 574 uint32_t ARMAssembler::reg_post(int Rm) 575 { 576 return (((uint32_t(Rm)>>31)^1)<<23) | (abs(Rm)&0xF); 577 } 578 579 }; // namespace android 580 581