1 /* libs/pixelflinger/codeflinger/ARMAssembler.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #define LOG_TAG "ARMAssembler" 19 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <cutils/log.h> 23 #include <cutils/properties.h> 24 25 #if defined(WITH_LIB_HARDWARE) 26 #include <hardware_legacy/qemu_tracing.h> 27 #endif 28 29 #include <private/pixelflinger/ggl_context.h> 30 31 #include "codeflinger/ARMAssembler.h" 32 #include "codeflinger/CodeCache.h" 33 #include "codeflinger/disassem.h" 34 35 // ---------------------------------------------------------------------------- 36 37 namespace android { 38 39 // ---------------------------------------------------------------------------- 40 #if 0 41 #pragma mark - 42 #pragma mark ARMAssembler... 43 #endif 44 45 ARMAssembler::ARMAssembler(const sp<Assembly>& assembly) 46 : ARMAssemblerInterface(), 47 mAssembly(assembly) 48 { 49 mBase = mPC = (uint32_t *)assembly->base(); 50 mDuration = ggl_system_time(); 51 #if defined(WITH_LIB_HARDWARE) 52 mQemuTracing = true; 53 #endif 54 } 55 56 ARMAssembler::~ARMAssembler() 57 { 58 } 59 60 uint32_t* ARMAssembler::pc() const 61 { 62 return mPC; 63 } 64 65 uint32_t* ARMAssembler::base() const 66 { 67 return mBase; 68 } 69 70 void ARMAssembler::reset() 71 { 72 mBase = mPC = (uint32_t *)mAssembly->base(); 73 mBranchTargets.clear(); 74 mLabels.clear(); 75 mLabelsInverseMapping.clear(); 76 mComments.clear(); 77 } 78 79 // ---------------------------------------------------------------------------- 80 81 void ARMAssembler::disassemble(const char* name) 82 { 83 if (name) { 84 printf("%s:\n", name); 85 } 86 size_t count = pc()-base(); 87 uint32_t* i = base(); 88 while (count--) { 89 ssize_t label = mLabelsInverseMapping.indexOfKey(i); 90 if (label >= 0) { 91 printf("%s:\n", mLabelsInverseMapping.valueAt(label)); 92 } 93 ssize_t comment = mComments.indexOfKey(i); 94 if (comment >= 0) { 95 printf("; %s\n", mComments.valueAt(comment)); 96 } 97 printf("%08x: %08x ", int(i), int(i[0])); 98 ::disassemble((u_int)i); 99 i++; 100 } 101 } 102 103 void ARMAssembler::comment(const char* string) 104 { 105 mComments.add(mPC, string); 106 } 107 108 void ARMAssembler::label(const char* theLabel) 109 { 110 mLabels.add(theLabel, mPC); 111 mLabelsInverseMapping.add(mPC, theLabel); 112 } 113 114 void ARMAssembler::B(int cc, const char* label) 115 { 116 mBranchTargets.add(branch_target_t(label, mPC)); 117 *mPC++ = (cc<<28) | (0xA<<24) | 0; 118 } 119 120 void ARMAssembler::BL(int cc, const char* label) 121 { 122 mBranchTargets.add(branch_target_t(label, mPC)); 123 *mPC++ = (cc<<28) | (0xB<<24) | 0; 124 } 125 126 #if 0 127 #pragma mark - 128 #pragma mark Prolog/Epilog & Generate... 129 #endif 130 131 132 void ARMAssembler::prolog() 133 { 134 // write dummy prolog code 135 mPrologPC = mPC; 136 STM(AL, FD, SP, 1, LSAVED); 137 } 138 139 void ARMAssembler::epilog(uint32_t touched) 140 { 141 touched &= LSAVED; 142 if (touched) { 143 // write prolog code 144 uint32_t* pc = mPC; 145 mPC = mPrologPC; 146 STM(AL, FD, SP, 1, touched | LLR); 147 mPC = pc; 148 // write epilog code 149 LDM(AL, FD, SP, 1, touched | LLR); 150 BX(AL, LR); 151 } else { // heh, no registers to save! 152 // write prolog code 153 uint32_t* pc = mPC; 154 mPC = mPrologPC; 155 MOV(AL, 0, R0, R0); // NOP 156 mPC = pc; 157 // write epilog code 158 BX(AL, LR); 159 } 160 } 161 162 int ARMAssembler::generate(const char* name) 163 { 164 // fixup all the branches 165 size_t count = mBranchTargets.size(); 166 while (count--) { 167 const branch_target_t& bt = mBranchTargets[count]; 168 uint32_t* target_pc = mLabels.valueFor(bt.label); 169 LOG_ALWAYS_FATAL_IF(!target_pc, 170 "error resolving branch targets, target_pc is null"); 171 int32_t offset = int32_t(target_pc - (bt.pc+2)); 172 *bt.pc |= offset & 0xFFFFFF; 173 } 174 175 mAssembly->resize( int(pc()-base())*4 ); 176 177 // the instruction cache is flushed by CodeCache 178 const int64_t duration = ggl_system_time() - mDuration; 179 const char * const format = "generated %s (%d ins) at [%p:%p] in %lld ns\n"; 180 LOGI(format, name, int(pc()-base()), base(), pc(), duration); 181 182 #if defined(WITH_LIB_HARDWARE) 183 if (__builtin_expect(mQemuTracing, 0)) { 184 int err = qemu_add_mapping(int(base()), name); 185 mQemuTracing = (err >= 0); 186 } 187 #endif 188 189 char value[PROPERTY_VALUE_MAX]; 190 property_get("debug.pf.disasm", value, "0"); 191 if (atoi(value) != 0) { 192 printf(format, name, int(pc()-base()), base(), pc(), duration); 193 disassemble(name); 194 } 195 196 return NO_ERROR; 197 } 198 199 uint32_t* ARMAssembler::pcForLabel(const char* label) 200 { 201 return mLabels.valueFor(label); 202 } 203 204 // ---------------------------------------------------------------------------- 205 206 #if 0 207 #pragma mark - 208 #pragma mark Data Processing... 209 #endif 210 211 void ARMAssembler::dataProcessing(int opcode, int cc, 212 int s, int Rd, int Rn, uint32_t Op2) 213 { 214 *mPC++ = (cc<<28) | (opcode<<21) | (s<<20) | (Rn<<16) | (Rd<<12) | Op2; 215 } 216 217 #if 0 218 #pragma mark - 219 #pragma mark Multiply... 220 #endif 221 222 // multiply... 223 void ARMAssembler::MLA(int cc, int s, 224 int Rd, int Rm, int Rs, int Rn) { 225 if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; } 226 LOG_FATAL_IF(Rd==Rm, "MLA(r%u,r%u,r%u,r%u)", Rd,Rm,Rs,Rn); 227 *mPC++ = (cc<<28) | (1<<21) | (s<<20) | 228 (Rd<<16) | (Rn<<12) | (Rs<<8) | 0x90 | Rm; 229 } 230 void ARMAssembler::MUL(int cc, int s, 231 int Rd, int Rm, int Rs) { 232 if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; } 233 LOG_FATAL_IF(Rd==Rm, "MUL(r%u,r%u,r%u)", Rd,Rm,Rs); 234 *mPC++ = (cc<<28) | (s<<20) | (Rd<<16) | (Rs<<8) | 0x90 | Rm; 235 } 236 void ARMAssembler::UMULL(int cc, int s, 237 int RdLo, int RdHi, int Rm, int Rs) { 238 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, 239 "UMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); 240 *mPC++ = (cc<<28) | (1<<23) | (s<<20) | 241 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; 242 } 243 void ARMAssembler::UMUAL(int cc, int s, 244 int RdLo, int RdHi, int Rm, int Rs) { 245 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, 246 "UMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); 247 *mPC++ = (cc<<28) | (1<<23) | (1<<21) | (s<<20) | 248 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; 249 } 250 void ARMAssembler::SMULL(int cc, int s, 251 int RdLo, int RdHi, int Rm, int Rs) { 252 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, 253 "SMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); 254 *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (s<<20) | 255 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; 256 } 257 void ARMAssembler::SMUAL(int cc, int s, 258 int RdLo, int RdHi, int Rm, int Rs) { 259 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, 260 "SMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); 261 *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (1<<21) | (s<<20) | 262 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; 263 } 264 265 #if 0 266 #pragma mark - 267 #pragma mark Branches... 268 #endif 269 270 // branches... 271 void ARMAssembler::B(int cc, uint32_t* pc) 272 { 273 int32_t offset = int32_t(pc - (mPC+2)); 274 *mPC++ = (cc<<28) | (0xA<<24) | (offset & 0xFFFFFF); 275 } 276 277 void ARMAssembler::BL(int cc, uint32_t* pc) 278 { 279 int32_t offset = int32_t(pc - (mPC+2)); 280 *mPC++ = (cc<<28) | (0xB<<24) | (offset & 0xFFFFFF); 281 } 282 283 void ARMAssembler::BX(int cc, int Rn) 284 { 285 *mPC++ = (cc<<28) | 0x12FFF10 | Rn; 286 } 287 288 #if 0 289 #pragma mark - 290 #pragma mark Data Transfer... 291 #endif 292 293 // data transfert... 294 void ARMAssembler::LDR(int cc, int Rd, int Rn, uint32_t offset) { 295 *mPC++ = (cc<<28) | (1<<26) | (1<<20) | (Rn<<16) | (Rd<<12) | offset; 296 } 297 void ARMAssembler::LDRB(int cc, int Rd, int Rn, uint32_t offset) { 298 *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (1<<20) | (Rn<<16) | (Rd<<12) | offset; 299 } 300 void ARMAssembler::STR(int cc, int Rd, int Rn, uint32_t offset) { 301 *mPC++ = (cc<<28) | (1<<26) | (Rn<<16) | (Rd<<12) | offset; 302 } 303 void ARMAssembler::STRB(int cc, int Rd, int Rn, uint32_t offset) { 304 *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (Rn<<16) | (Rd<<12) | offset; 305 } 306 307 void ARMAssembler::LDRH(int cc, int Rd, int Rn, uint32_t offset) { 308 *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xB0 | offset; 309 } 310 void ARMAssembler::LDRSB(int cc, int Rd, int Rn, uint32_t offset) { 311 *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xD0 | offset; 312 } 313 void ARMAssembler::LDRSH(int cc, int Rd, int Rn, uint32_t offset) { 314 *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xF0 | offset; 315 } 316 void ARMAssembler::STRH(int cc, int Rd, int Rn, uint32_t offset) { 317 *mPC++ = (cc<<28) | (Rn<<16) | (Rd<<12) | 0xB0 | offset; 318 } 319 320 #if 0 321 #pragma mark - 322 #pragma mark Block Data Transfer... 323 #endif 324 325 // block data transfer... 326 void ARMAssembler::LDM(int cc, int dir, 327 int Rn, int W, uint32_t reg_list) 328 { // ED FD EA FA IB IA DB DA 329 const uint8_t P[8] = { 1, 0, 1, 0, 1, 0, 1, 0 }; 330 const uint8_t U[8] = { 1, 1, 0, 0, 1, 1, 0, 0 }; 331 *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) | 332 (uint32_t(U[dir])<<23) | (1<<20) | (W<<21) | (Rn<<16) | reg_list; 333 } 334 335 void ARMAssembler::STM(int cc, int dir, 336 int Rn, int W, uint32_t reg_list) 337 { // ED FD EA FA IB IA DB DA 338 const uint8_t P[8] = { 0, 1, 0, 1, 1, 0, 1, 0 }; 339 const uint8_t U[8] = { 0, 0, 1, 1, 1, 1, 0, 0 }; 340 *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) | 341 (uint32_t(U[dir])<<23) | (0<<20) | (W<<21) | (Rn<<16) | reg_list; 342 } 343 344 #if 0 345 #pragma mark - 346 #pragma mark Special... 347 #endif 348 349 // special... 350 void ARMAssembler::SWP(int cc, int Rn, int Rd, int Rm) { 351 *mPC++ = (cc<<28) | (2<<23) | (Rn<<16) | (Rd << 12) | 0x90 | Rm; 352 } 353 void ARMAssembler::SWPB(int cc, int Rn, int Rd, int Rm) { 354 *mPC++ = (cc<<28) | (2<<23) | (1<<22) | (Rn<<16) | (Rd << 12) | 0x90 | Rm; 355 } 356 void ARMAssembler::SWI(int cc, uint32_t comment) { 357 *mPC++ = (cc<<28) | (0xF<<24) | comment; 358 } 359 360 #if 0 361 #pragma mark - 362 #pragma mark DSP instructions... 363 #endif 364 365 // DSP instructions... 366 void ARMAssembler::PLD(int Rn, uint32_t offset) { 367 LOG_ALWAYS_FATAL_IF(!((offset&(1<<24)) && !(offset&(1<<21))), 368 "PLD only P=1, W=0"); 369 *mPC++ = 0xF550F000 | (Rn<<16) | offset; 370 } 371 372 void ARMAssembler::CLZ(int cc, int Rd, int Rm) 373 { 374 *mPC++ = (cc<<28) | 0x16F0F10| (Rd<<12) | Rm; 375 } 376 377 void ARMAssembler::QADD(int cc, int Rd, int Rm, int Rn) 378 { 379 *mPC++ = (cc<<28) | 0x1000050 | (Rn<<16) | (Rd<<12) | Rm; 380 } 381 382 void ARMAssembler::QDADD(int cc, int Rd, int Rm, int Rn) 383 { 384 *mPC++ = (cc<<28) | 0x1400050 | (Rn<<16) | (Rd<<12) | Rm; 385 } 386 387 void ARMAssembler::QSUB(int cc, int Rd, int Rm, int Rn) 388 { 389 *mPC++ = (cc<<28) | 0x1200050 | (Rn<<16) | (Rd<<12) | Rm; 390 } 391 392 void ARMAssembler::QDSUB(int cc, int Rd, int Rm, int Rn) 393 { 394 *mPC++ = (cc<<28) | 0x1600050 | (Rn<<16) | (Rd<<12) | Rm; 395 } 396 397 void ARMAssembler::SMUL(int cc, int xy, 398 int Rd, int Rm, int Rs) 399 { 400 *mPC++ = (cc<<28) | 0x1600080 | (Rd<<16) | (Rs<<8) | (xy<<4) | Rm; 401 } 402 403 void ARMAssembler::SMULW(int cc, int y, 404 int Rd, int Rm, int Rs) 405 { 406 *mPC++ = (cc<<28) | 0x12000A0 | (Rd<<16) | (Rs<<8) | (y<<4) | Rm; 407 } 408 409 void ARMAssembler::SMLA(int cc, int xy, 410 int Rd, int Rm, int Rs, int Rn) 411 { 412 *mPC++ = (cc<<28) | 0x1000080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (xy<<4) | Rm; 413 } 414 415 void ARMAssembler::SMLAL(int cc, int xy, 416 int RdHi, int RdLo, int Rs, int Rm) 417 { 418 *mPC++ = (cc<<28) | 0x1400080 | (RdHi<<16) | (RdLo<<12) | (Rs<<8) | (xy<<4) | Rm; 419 } 420 421 void ARMAssembler::SMLAW(int cc, int y, 422 int Rd, int Rm, int Rs, int Rn) 423 { 424 *mPC++ = (cc<<28) | 0x1200080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (y<<4) | Rm; 425 } 426 427 #if 0 428 #pragma mark - 429 #pragma mark Byte/half word extract and extend (ARMv6+ only)... 430 #endif 431 432 void ARMAssembler::UXTB16(int cc, int Rd, int Rm, int rotate) 433 { 434 *mPC++ = (cc<<28) | 0x6CF0070 | (Rd<<12) | ((rotate >> 3) << 10) | Rm; 435 } 436 #if 0 437 #pragma mark - 438 #pragma mark Bit manipulation (ARMv7+ only)... 439 #endif 440 441 // Bit manipulation (ARMv7+ only)... 442 void ARMAssembler::UBFX(int cc, int Rd, int Rn, int lsb, int width) 443 { 444 *mPC++ = (cc<<28) | 0x7E00000 | ((width-1)<<16) | (Rd<<12) | (lsb<<7) | 0x50 | Rn; 445 } 446 447 }; // namespace android 448 449