1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 /** 18 * @author Alexander V. Astapchuk 19 */ 20 21 /** 22 * @file 23 * @brief Main decoding (disassembling) routines implementation. 24 */ 25 26 #include "dec_base.h" 27 #include "enc_prvt.h" 28 #include <stdio.h> 29 //#include "open/common.h" 30 31 bool DecoderBase::is_prefix(const unsigned char * bytes) 32 { 33 unsigned char b0 = *bytes; 34 unsigned char b1 = *(bytes+1); 35 if (b0 == 0xF0) { // LOCK 36 return true; 37 } 38 if (b0==0xF2 || b0==0xF3) { // REPNZ/REPZ prefixes 39 if (b1 == 0x0F) { // .... but may be a part of SIMD opcode 40 return false; 41 } 42 return true; 43 } 44 if (b0 == 0x2E || b0 == 0x36 || b0==0x3E || b0==0x26 || b0==0x64 || b0==0x3E) { 45 // branch hints, segment prefixes 46 return true; 47 } 48 if (b0==0x66) { // operand-size prefix 49 if (b1 == 0x0F) { // .... but may be a part of SIMD opcode 50 return false; 51 } 52 return false; //XXX - currently considered as part of opcode//true; 53 } 54 if (b0==0x67) { // address size prefix 55 return true; 56 } 57 return false; 58 } 59 60 // Returns prefix count from 0 to 4, or ((unsigned int)-1) on error 61 unsigned int DecoderBase::fill_prefs(const unsigned char * bytes, Inst * pinst) 62 { 63 const unsigned char * my_bytes = bytes; 64 65 while( 1 ) 66 { 67 unsigned char by1 = *my_bytes; 68 unsigned char by2 = *(my_bytes + 1); 69 Inst::PrefGroups where; 70 71 switch( by1 ) 72 { 73 case InstPrefix_REPNE: 74 case InstPrefix_REP: 75 { 76 if( 0x0F == by2) 77 { 78 return pinst->prefc; 79 } 80 } 81 case InstPrefix_LOCK: 82 { 83 where = Inst::Group1; 84 break; 85 } 86 case InstPrefix_CS: 87 case InstPrefix_SS: 88 case InstPrefix_DS: 89 case InstPrefix_ES: 90 case InstPrefix_FS: 91 case InstPrefix_GS: 92 // case InstPrefix_HintTaken: the same as CS override 93 // case InstPrefix_HintNotTaken: the same as DS override 94 { 95 where = Inst::Group2; 96 break; 97 } 98 case InstPrefix_OpndSize: 99 { 100 //NOTE: prefix does not work for JMP Sz16, the opcode is 0x66 0xe9 101 // here 0x66 will be treated as prefix, try_mn will try to match the code starting at 0xe9 102 // it will match JMP Sz32 ... 103 //HACK: assume it is the last prefix, return any way 104 if( 0x0F == by2) 105 { 106 return pinst->prefc; 107 } 108 return pinst->prefc; 109 where = Inst::Group3; 110 break; 111 } 112 case InstPrefix_AddrSize: 113 { 114 where = Inst::Group4; 115 break; 116 } 117 default: 118 { 119 return pinst->prefc; 120 } 121 } 122 // Assertions are not allowed here. 123 // Error situations should result in returning error status 124 if (InstPrefix_Null != pinst->pref[where]) //only one prefix in each group 125 return (unsigned int)-1; 126 127 pinst->pref[where] = (InstPrefix)by1; 128 129 if (pinst->prefc >= 4) //no more than 4 prefixes 130 return (unsigned int)-1; 131 132 pinst->prefc++; 133 ++my_bytes; 134 } 135 } 136 137 138 139 unsigned DecoderBase::decode(const void * addr, Inst * pinst) 140 { 141 Inst tmp; 142 143 //assert( *(unsigned char*)addr != 0x66); 144 145 const unsigned char * bytes = (unsigned char*)addr; 146 147 // Load up to 4 prefixes 148 // for each Mnemonic 149 unsigned int pref_count = fill_prefs(bytes, &tmp); 150 151 if (pref_count == (unsigned int)-1) // Wrong prefix sequence, or >4 prefixes 152 return 0; // Error 153 154 bytes += pref_count; 155 156 // for each opcodedesc 157 // if (raw_len == 0) memcmp(, raw_len) 158 // else check the mixed state which is one of the following: 159 // /digit /i /rw /rd /rb 160 161 bool found = false; 162 const unsigned char * saveBytes = bytes; 163 for (unsigned mn=1; mn<Mnemonic_Count; mn++) { 164 bytes = saveBytes; 165 found=try_mn((Mnemonic)mn, &bytes, &tmp); 166 if (found) { 167 tmp.mn = (Mnemonic)mn; 168 break; 169 } 170 } 171 if (!found) { 172 // Unknown opcode 173 return 0; 174 } 175 tmp.size = (unsigned)(bytes-(const unsigned char*)addr); 176 if (pinst) { 177 *pinst = tmp; 178 } 179 return tmp.size; 180 } 181 182 #ifdef _EM64T_ 183 #define EXTEND_REG(reg, flag) \ 184 ((NULL == rex || 0 == rex->flag) ? reg : (reg + 8)) 185 #else 186 #define EXTEND_REG(reg, flag) (reg) 187 #endif 188 189 //don't know the use of rex, seems not used when _EM64T_ is not enabled 190 bool DecoderBase::decode_aux(const EncoderBase::OpcodeDesc& odesc, unsigned aux, 191 const unsigned char ** pbuf, Inst * pinst 192 #ifdef _EM64T_ 193 , const Rex UNREF *rex 194 #endif 195 ) 196 { 197 OpcodeByteKind kind = (OpcodeByteKind)(aux & OpcodeByteKind_KindMask); 198 unsigned byte = (aux & OpcodeByteKind_OpcodeMask); 199 unsigned data_byte = **pbuf; 200 EncoderBase::Operand& opnd = pinst->operands[pinst->argc]; 201 const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc]; 202 203 switch (kind) { 204 case OpcodeByteKind_SlashR: 205 { 206 RegName reg; 207 OpndKind okind; 208 const ModRM& modrm = *(ModRM*)*pbuf; 209 if (opndDesc.kind & OpndKind_Mem) { // 1st operand is memory 210 #ifdef _EM64T_ 211 decodeModRM(odesc, pbuf, pinst, rex); 212 #else 213 decodeModRM(odesc, pbuf, pinst); 214 #endif 215 ++pinst->argc; 216 const EncoderBase::OpndDesc& opndDesc2 = odesc.opnds[pinst->argc]; 217 okind = ((opndDesc2.kind & OpndKind_XMMReg) || opndDesc2.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg; 218 EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc]; 219 reg = getRegName(okind, opndDesc2.size, EXTEND_REG(modrm.reg, r)); 220 regOpnd = EncoderBase::Operand(reg); 221 } else { // 2nd operand is memory 222 okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg; 223 EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc]; 224 reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.reg, r)); 225 regOpnd = EncoderBase::Operand(reg); 226 ++pinst->argc; 227 #ifdef _EM64T_ 228 decodeModRM(odesc, pbuf, pinst, rex); 229 #else 230 decodeModRM(odesc, pbuf, pinst); 231 #endif 232 } 233 ++pinst->argc; 234 } 235 return true; 236 case OpcodeByteKind_rb: 237 case OpcodeByteKind_rw: 238 case OpcodeByteKind_rd: 239 { 240 // Gregory - 241 // Here we don't parse register because for current needs 242 // disassembler doesn't require to parse all operands 243 unsigned regid = data_byte - byte; 244 if (regid>7) { 245 return false; 246 } 247 OpndSize opnd_size; 248 switch(kind) 249 { 250 case OpcodeByteKind_rb: 251 { 252 opnd_size = OpndSize_8; 253 break; 254 } 255 case OpcodeByteKind_rw: 256 { 257 opnd_size = OpndSize_16; 258 break; 259 } 260 case OpcodeByteKind_rd: 261 { 262 opnd_size = OpndSize_32; 263 break; 264 } 265 default: 266 opnd_size = OpndSize_32; // so there is no compiler warning 267 assert( false ); 268 } 269 opnd = EncoderBase::Operand( getRegName(OpndKind_GPReg, opnd_size, regid) ); 270 271 ++pinst->argc; 272 ++*pbuf; 273 return true; 274 } 275 case OpcodeByteKind_cb: 276 { 277 char offset = *(char*)*pbuf; 278 *pbuf += 1; 279 opnd = EncoderBase::Operand(offset); 280 ++pinst->argc; 281 //pinst->direct_addr = (void*)(pinst->offset + *pbuf); 282 } 283 return true; 284 case OpcodeByteKind_cw: 285 // not an error, but not expected in current env 286 // Android x86 287 { 288 short offset = *(short*)*pbuf; 289 *pbuf += 2; 290 opnd = EncoderBase::Operand(offset); 291 ++pinst->argc; 292 } 293 return true; 294 //return false; 295 case OpcodeByteKind_cd: 296 { 297 int offset = *(int*)*pbuf; 298 *pbuf += 4; 299 opnd = EncoderBase::Operand(offset); 300 ++pinst->argc; 301 } 302 return true; 303 case OpcodeByteKind_SlashNum: 304 { 305 const ModRM& modrm = *(ModRM*)*pbuf; 306 if (modrm.reg != byte) { 307 return false; 308 } 309 decodeModRM(odesc, pbuf, pinst 310 #ifdef _EM64T_ 311 , rex 312 #endif 313 ); 314 ++pinst->argc; 315 } 316 return true; 317 case OpcodeByteKind_ib: 318 { 319 char ival = *(char*)*pbuf; 320 opnd = EncoderBase::Operand(ival); 321 ++pinst->argc; 322 *pbuf += 1; 323 } 324 return true; 325 case OpcodeByteKind_iw: 326 { 327 short ival = *(short*)*pbuf; 328 opnd = EncoderBase::Operand(ival); 329 ++pinst->argc; 330 *pbuf += 2; 331 } 332 return true; 333 case OpcodeByteKind_id: 334 { 335 int ival = *(int*)*pbuf; 336 opnd = EncoderBase::Operand(ival); 337 ++pinst->argc; 338 *pbuf += 4; 339 } 340 return true; 341 #ifdef _EM64T_ 342 case OpcodeByteKind_io: 343 { 344 long long int ival = *(long long int*)*pbuf; 345 opnd = EncoderBase::Operand(OpndSize_64, ival); 346 ++pinst->argc; 347 *pbuf += 8; 348 } 349 return true; 350 #endif 351 case OpcodeByteKind_plus_i: 352 { 353 unsigned regid = data_byte - byte; 354 if (regid>7) { 355 return false; 356 } 357 ++*pbuf; 358 return true; 359 } 360 case OpcodeByteKind_ZeroOpcodeByte: // cant be here 361 return false; 362 default: 363 // unknown kind ? how comes ? 364 break; 365 } 366 return false; 367 } 368 369 bool DecoderBase::try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst) { 370 const unsigned char * save_pbuf = *pbuf; 371 EncoderBase::OpcodeDesc * opcodes = EncoderBase::opcodes[mn]; 372 373 for (unsigned i=0; !opcodes[i].last; i++) { 374 const EncoderBase::OpcodeDesc& odesc = opcodes[i]; 375 char *opcode_ptr = const_cast<char *>(odesc.opcode); 376 int opcode_len = odesc.opcode_len; 377 #ifdef _EM64T_ 378 Rex *prex = NULL; 379 Rex rex; 380 #endif 381 382 *pbuf = save_pbuf; 383 #ifdef _EM64T_ 384 // Match REX prefixes 385 unsigned char rex_byte = (*pbuf)[0]; 386 if ((rex_byte & 0xf0) == 0x40) 387 { 388 if ((rex_byte & 0x08) != 0) 389 { 390 // Have REX.W 391 if (opcode_len > 0 && opcode_ptr[0] == 0x48) 392 { 393 // Have REX.W in opcode. All mnemonics that allow 394 // REX.W have to have specified it in opcode, 395 // otherwise it is not allowed 396 rex = *(Rex *)*pbuf; 397 prex = &rex; 398 (*pbuf)++; 399 opcode_ptr++; 400 opcode_len--; 401 } 402 } 403 else 404 { 405 // No REX.W, so it doesn't have to be in opcode. We 406 // have REX.B, REX.X, REX.R or their combination, but 407 // not in opcode, they may extend any part of the 408 // instruction 409 rex = *(Rex *)*pbuf; 410 prex = &rex; 411 (*pbuf)++; 412 } 413 } 414 #endif 415 if (opcode_len != 0) { 416 if (memcmp(*pbuf, opcode_ptr, opcode_len)) { 417 continue; 418 } 419 *pbuf += opcode_len; 420 } 421 if (odesc.aux0 != 0) { 422 423 if (!decode_aux(odesc, odesc.aux0, pbuf, pinst 424 #ifdef _EM64T_ 425 , prex 426 #endif 427 )) { 428 continue; 429 } 430 if (odesc.aux1 != 0) { 431 if (!decode_aux(odesc, odesc.aux1, pbuf, pinst 432 #ifdef _EM64T_ 433 , prex 434 #endif 435 )) { 436 continue; 437 } 438 } 439 pinst->odesc = &opcodes[i]; 440 return true; 441 } 442 else { 443 // Can't have empty opcode 444 assert(opcode_len != 0); 445 pinst->odesc = &opcodes[i]; 446 return true; 447 } 448 } 449 return false; 450 } 451 452 bool DecoderBase::decodeModRM(const EncoderBase::OpcodeDesc& odesc, 453 const unsigned char ** pbuf, Inst * pinst 454 #ifdef _EM64T_ 455 , const Rex *rex 456 #endif 457 ) 458 { 459 EncoderBase::Operand& opnd = pinst->operands[pinst->argc]; 460 const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc]; 461 462 //XXX debug ///assert(0x66 != *(*pbuf-2)); 463 const ModRM& modrm = *(ModRM*)*pbuf; 464 *pbuf += 1; 465 466 RegName base = RegName_Null; 467 RegName index = RegName_Null; 468 int disp = 0; 469 unsigned scale = 0; 470 471 // On x86_64 all mnemonics that allow REX.W have REX.W in opcode. 472 // Therefore REX.W is simply ignored, and opndDesc.size is used 473 474 if (modrm.mod == 3) { 475 // we have only modrm. no sib, no disp. 476 // Android x86: Use XMMReg for 64b operand. 477 OpndKind okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size == OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg; 478 RegName reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.rm, b)); 479 opnd = EncoderBase::Operand(reg); 480 return true; 481 } 482 //Android x86: m16, m32, m64: mean a byte[word|doubleword] operand in memory 483 //base and index should be 32 bits!!! 484 const SIB& sib = *(SIB*)*pbuf; 485 // check whether we have a sib 486 if (modrm.rm == 4) { 487 // yes, we have SIB 488 *pbuf += 1; 489 // scale = sib.scale == 0 ? 0 : (1<<sib.scale); 490 scale = (1<<sib.scale); 491 if (sib.index != 4) { 492 index = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.index, x)); //Android x86: OpndDesc.size 493 } else { 494 // (sib.index == 4) => no index 495 //%esp can't be sib.index 496 } 497 498 if (sib.base != 5 || modrm.mod != 0) { 499 base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.base, b)); //Android x86: OpndDesc.size 500 } else { 501 // (sib.base == 5 && modrm.mod == 0) => no base 502 } 503 } 504 else { 505 if (modrm.mod != 0 || modrm.rm != 5) { 506 base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(modrm.rm, b)); //Android x86: OpndDesc.size 507 } 508 else { 509 // mod=0 && rm == 5 => only disp32 510 } 511 } 512 513 //update disp and pbuf 514 if (modrm.mod == 2) { 515 // have disp32 516 disp = *(int*)*pbuf; 517 *pbuf += 4; 518 } 519 else if (modrm.mod == 1) { 520 // have disp8 521 disp = *(char*)*pbuf; 522 *pbuf += 1; 523 } 524 else { 525 assert(modrm.mod == 0); 526 if (modrm.rm == 5) { 527 // have disp32 w/o sib 528 disp = *(int*)*pbuf; 529 *pbuf += 4; 530 } 531 else if (modrm.rm == 4 && sib.base == 5) { 532 // have disp32 with SI in sib 533 disp = *(int*)*pbuf; 534 *pbuf += 4; 535 } 536 } 537 opnd = EncoderBase::Operand(opndDesc.size, base, index, scale, disp); 538 return true; 539 } 540 541