1 //===- subzero/src/IceASanInstrumentation.cpp - ASan ------------*- C++ -*-===// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Implements the AddressSanitizer instrumentation class. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "IceASanInstrumentation.h" 16 17 #include "IceBuildDefs.h" 18 #include "IceCfg.h" 19 #include "IceCfgNode.h" 20 #include "IceGlobalInits.h" 21 #include "IceInst.h" 22 #include "IceTargetLowering.h" 23 #include "IceTypes.h" 24 25 #include <sstream> 26 #include <unordered_map> 27 #include <unordered_set> 28 #include <vector> 29 30 namespace Ice { 31 32 namespace { 33 34 constexpr SizeT BytesPerWord = sizeof(uint32_t); 35 constexpr SizeT RzSize = 32; 36 constexpr SizeT ShadowScaleLog2 = 3; 37 constexpr SizeT ShadowScale = 1 << ShadowScaleLog2; 38 constexpr SizeT ShadowLength32 = 1 << (32 - ShadowScaleLog2); 39 constexpr int32_t StackPoisonVal = -1; 40 constexpr const char *ASanPrefix = "__asan"; 41 constexpr const char *RzPrefix = "__$rz"; 42 constexpr const char *RzArrayName = "__$rz_array"; 43 constexpr const char *RzSizesName = "__$rz_sizes"; 44 const llvm::NaClBitcodeRecord::RecordVector RzContents = 45 llvm::NaClBitcodeRecord::RecordVector(RzSize, 'R'); 46 47 // In order to instrument the code correctly, the .pexe must not have had its 48 // symbols stripped. 49 using StringMap = std::unordered_map<std::string, std::string>; 50 using StringSet = std::unordered_set<std::string>; 51 // TODO(tlively): Handle all allocation functions 52 const StringMap FuncSubstitutions = {{"malloc", "__asan_malloc"}, 53 {"free", "__asan_free"}, 54 {"calloc", "__asan_calloc"}, 55 {"__asan_dummy_calloc", "__asan_calloc"}, 56 {"realloc", "__asan_realloc"}}; 57 const StringSet FuncBlackList = {"_Balloc"}; 58 59 llvm::NaClBitcodeRecord::RecordVector sizeToByteVec(SizeT Size) { 60 llvm::NaClBitcodeRecord::RecordVector SizeContents; 61 for (unsigned i = 0; i < sizeof(Size); ++i) { 62 SizeContents.emplace_back(Size % (1 << CHAR_BIT)); 63 Size >>= CHAR_BIT; 64 } 65 return SizeContents; 66 } 67 68 } // end of anonymous namespace 69 70 ICE_TLS_DEFINE_FIELD(VarSizeMap *, ASanInstrumentation, LocalVars); 71 ICE_TLS_DEFINE_FIELD(std::vector<InstStore *> *, ASanInstrumentation, 72 LocalDtors); 73 ICE_TLS_DEFINE_FIELD(CfgNode *, ASanInstrumentation, CurNode); 74 ICE_TLS_DEFINE_FIELD(VarSizeMap *, ASanInstrumentation, CheckedVars); 75 76 bool ASanInstrumentation::isInstrumentable(Cfg *Func) { 77 std::string FuncName = Func->getFunctionName().toStringOrEmpty(); 78 return FuncName == "" || 79 (FuncBlackList.count(FuncName) == 0 && FuncName.find(ASanPrefix) != 0); 80 } 81 82 // Create redzones around all global variables, ensuring that the initializer 83 // types of the redzones and their associated globals match so that they are 84 // laid out together in memory. 85 void ASanInstrumentation::instrumentGlobals(VariableDeclarationList &Globals) { 86 std::unique_lock<std::mutex> _(GlobalsMutex); 87 if (DidProcessGlobals) 88 return; 89 VariableDeclarationList NewGlobals; 90 // Global holding pointers to all redzones 91 auto *RzArray = VariableDeclaration::create(&NewGlobals); 92 // Global holding sizes of all redzones 93 auto *RzSizes = VariableDeclaration::create(&NewGlobals); 94 95 RzArray->setName(Ctx, RzArrayName); 96 RzSizes->setName(Ctx, RzSizesName); 97 RzArray->setIsConstant(true); 98 RzSizes->setIsConstant(true); 99 NewGlobals.push_back(RzArray); 100 NewGlobals.push_back(RzSizes); 101 102 using PrototypeMap = std::unordered_map<std::string, FunctionDeclaration *>; 103 PrototypeMap ProtoSubstitutions; 104 for (VariableDeclaration *Global : Globals) { 105 assert(Global->getAlignment() <= RzSize); 106 VariableDeclaration *RzLeft = VariableDeclaration::create(&NewGlobals); 107 VariableDeclaration *NewGlobal = Global; 108 VariableDeclaration *RzRight = VariableDeclaration::create(&NewGlobals); 109 RzLeft->setName(Ctx, nextRzName()); 110 RzRight->setName(Ctx, nextRzName()); 111 SizeT Alignment = std::max(RzSize, Global->getAlignment()); 112 SizeT RzLeftSize = Alignment; 113 SizeT RzRightSize = 114 RzSize + Utils::OffsetToAlignment(Global->getNumBytes(), Alignment); 115 if (!Global->hasNonzeroInitializer()) { 116 RzLeft->addInitializer(VariableDeclaration::ZeroInitializer::create( 117 &NewGlobals, RzLeftSize)); 118 RzRight->addInitializer(VariableDeclaration::ZeroInitializer::create( 119 &NewGlobals, RzRightSize)); 120 } else { 121 RzLeft->addInitializer(VariableDeclaration::DataInitializer::create( 122 &NewGlobals, llvm::NaClBitcodeRecord::RecordVector(RzLeftSize, 'R'))); 123 RzRight->addInitializer(VariableDeclaration::DataInitializer::create( 124 &NewGlobals, 125 llvm::NaClBitcodeRecord::RecordVector(RzRightSize, 'R'))); 126 127 // replace any pointers to allocator functions 128 NewGlobal = VariableDeclaration::create(&NewGlobals); 129 NewGlobal->setName(Global->getName()); 130 std::vector<VariableDeclaration::Initializer *> GlobalInits = 131 Global->getInitializers(); 132 for (VariableDeclaration::Initializer *Init : GlobalInits) { 133 auto *RelocInit = 134 llvm::dyn_cast<VariableDeclaration::RelocInitializer>(Init); 135 if (RelocInit == nullptr) { 136 NewGlobal->addInitializer(Init); 137 continue; 138 } 139 const GlobalDeclaration *TargetDecl = RelocInit->getDeclaration(); 140 const auto *TargetFunc = 141 llvm::dyn_cast<FunctionDeclaration>(TargetDecl); 142 if (TargetFunc == nullptr) { 143 NewGlobal->addInitializer(Init); 144 continue; 145 } 146 std::string TargetName = TargetDecl->getName().toStringOrEmpty(); 147 StringMap::const_iterator Subst = FuncSubstitutions.find(TargetName); 148 if (Subst == FuncSubstitutions.end()) { 149 NewGlobal->addInitializer(Init); 150 continue; 151 } 152 std::string SubstName = Subst->second; 153 PrototypeMap::iterator SubstProtoEntry = 154 ProtoSubstitutions.find(SubstName); 155 FunctionDeclaration *SubstProto; 156 if (SubstProtoEntry != ProtoSubstitutions.end()) 157 SubstProto = SubstProtoEntry->second; 158 else { 159 constexpr bool IsProto = true; 160 SubstProto = FunctionDeclaration::create( 161 Ctx, TargetFunc->getSignature(), TargetFunc->getCallingConv(), 162 llvm::GlobalValue::ExternalLinkage, IsProto); 163 SubstProto->setName(Ctx, SubstName); 164 ProtoSubstitutions.insert({SubstName, SubstProto}); 165 } 166 167 NewGlobal->addInitializer(VariableDeclaration::RelocInitializer::create( 168 &NewGlobals, SubstProto, RelocOffsetArray(0))); 169 } 170 } 171 172 RzLeft->setIsConstant(Global->getIsConstant()); 173 NewGlobal->setIsConstant(Global->getIsConstant()); 174 RzRight->setIsConstant(Global->getIsConstant()); 175 RzLeft->setAlignment(Alignment); 176 NewGlobal->setAlignment(Alignment); 177 RzRight->setAlignment(1); 178 RzArray->addInitializer(VariableDeclaration::RelocInitializer::create( 179 &NewGlobals, RzLeft, RelocOffsetArray(0))); 180 RzArray->addInitializer(VariableDeclaration::RelocInitializer::create( 181 &NewGlobals, RzRight, RelocOffsetArray(0))); 182 RzSizes->addInitializer(VariableDeclaration::DataInitializer::create( 183 &NewGlobals, sizeToByteVec(RzLeftSize))); 184 RzSizes->addInitializer(VariableDeclaration::DataInitializer::create( 185 &NewGlobals, sizeToByteVec(RzRightSize))); 186 187 NewGlobals.push_back(RzLeft); 188 NewGlobals.push_back(NewGlobal); 189 NewGlobals.push_back(RzRight); 190 RzGlobalsNum += 2; 191 192 GlobalSizes.insert({NewGlobal->getName(), NewGlobal->getNumBytes()}); 193 } 194 195 // Replace old list of globals, without messing up arena allocators 196 Globals.clear(); 197 Globals.merge(&NewGlobals); 198 DidProcessGlobals = true; 199 200 // Log the new set of globals 201 if (BuildDefs::dump() && (getFlags().getVerbose() & IceV_GlobalInit)) { 202 OstreamLocker _(Ctx); 203 Ctx->getStrDump() << "========= Instrumented Globals =========\n"; 204 for (VariableDeclaration *Global : Globals) { 205 Global->dump(Ctx->getStrDump()); 206 } 207 } 208 } 209 210 std::string ASanInstrumentation::nextRzName() { 211 std::stringstream Name; 212 Name << RzPrefix << RzNum++; 213 return Name.str(); 214 } 215 216 // Check for an alloca signaling the presence of local variables and add a 217 // redzone if it is found 218 void ASanInstrumentation::instrumentFuncStart(LoweringContext &Context) { 219 if (ICE_TLS_GET_FIELD(LocalDtors) == nullptr) { 220 ICE_TLS_SET_FIELD(LocalDtors, new std::vector<InstStore *>()); 221 ICE_TLS_SET_FIELD(LocalVars, new VarSizeMap()); 222 } 223 Cfg *Func = Context.getNode()->getCfg(); 224 using Entry = std::pair<SizeT, int32_t>; 225 std::vector<InstAlloca *> NewAllocas; 226 std::vector<Entry> PoisonVals; 227 Variable *FirstShadowLocVar; 228 InstArithmetic *ShadowIndexCalc; 229 InstArithmetic *ShadowLocCalc; 230 InstAlloca *Cur; 231 ConstantInteger32 *VarSizeOp; 232 while (!Context.atEnd()) { 233 Cur = llvm::dyn_cast<InstAlloca>(iteratorToInst(Context.getCur())); 234 VarSizeOp = (Cur == nullptr) 235 ? nullptr 236 : llvm::dyn_cast<ConstantInteger32>(Cur->getSizeInBytes()); 237 if (Cur == nullptr || VarSizeOp == nullptr) { 238 Context.advanceCur(); 239 Context.advanceNext(); 240 continue; 241 } 242 243 Cur->setDeleted(); 244 245 if (PoisonVals.empty()) { 246 // insert leftmost redzone 247 auto *LastRzVar = Func->makeVariable(IceType_i32); 248 LastRzVar->setName(Func, nextRzName()); 249 auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, RzSize); 250 constexpr SizeT Alignment = 8; 251 NewAllocas.emplace_back( 252 InstAlloca::create(Func, LastRzVar, ByteCount, Alignment)); 253 PoisonVals.emplace_back(Entry{RzSize >> ShadowScaleLog2, StackPoisonVal}); 254 255 // Calculate starting address for poisoning 256 FirstShadowLocVar = Func->makeVariable(IceType_i32); 257 FirstShadowLocVar->setName(Func, "firstShadowLoc"); 258 auto *ShadowIndexVar = Func->makeVariable(IceType_i32); 259 ShadowIndexVar->setName(Func, "shadowIndex"); 260 261 auto *ShadowScaleLog2Const = 262 ConstantInteger32::create(Ctx, IceType_i32, ShadowScaleLog2); 263 auto *ShadowMemLocConst = 264 ConstantInteger32::create(Ctx, IceType_i32, ShadowLength32); 265 266 ShadowIndexCalc = 267 InstArithmetic::create(Func, InstArithmetic::Lshr, ShadowIndexVar, 268 LastRzVar, ShadowScaleLog2Const); 269 ShadowLocCalc = 270 InstArithmetic::create(Func, InstArithmetic::Add, FirstShadowLocVar, 271 ShadowIndexVar, ShadowMemLocConst); 272 } 273 274 // create the new alloca that includes a redzone 275 SizeT VarSize = VarSizeOp->getValue(); 276 Variable *Dest = Cur->getDest(); 277 ICE_TLS_GET_FIELD(LocalVars)->insert({Dest, VarSize}); 278 SizeT RzPadding = RzSize + Utils::OffsetToAlignment(VarSize, RzSize); 279 auto *ByteCount = 280 ConstantInteger32::create(Ctx, IceType_i32, VarSize + RzPadding); 281 constexpr SizeT Alignment = 8; 282 NewAllocas.emplace_back( 283 InstAlloca::create(Func, Dest, ByteCount, Alignment)); 284 285 const SizeT Zeros = VarSize >> ShadowScaleLog2; 286 const SizeT Offset = VarSize % ShadowScale; 287 const SizeT PoisonBytes = 288 ((VarSize + RzPadding) >> ShadowScaleLog2) - Zeros - 1; 289 if (Zeros > 0) 290 PoisonVals.emplace_back(Entry{Zeros, 0}); 291 PoisonVals.emplace_back(Entry{1, (Offset == 0) ? StackPoisonVal : Offset}); 292 PoisonVals.emplace_back(Entry{PoisonBytes, StackPoisonVal}); 293 Context.advanceCur(); 294 Context.advanceNext(); 295 } 296 297 Context.rewind(); 298 if (PoisonVals.empty()) { 299 Context.advanceNext(); 300 return; 301 } 302 for (InstAlloca *RzAlloca : NewAllocas) { 303 Context.insert(RzAlloca); 304 } 305 Context.insert(ShadowIndexCalc); 306 Context.insert(ShadowLocCalc); 307 308 // Poison redzones 309 std::vector<Entry>::iterator Iter = PoisonVals.begin(); 310 for (SizeT Offset = 0; Iter != PoisonVals.end(); Offset += BytesPerWord) { 311 int32_t CurVals[BytesPerWord] = {0}; 312 for (uint32_t i = 0; i < BytesPerWord; ++i) { 313 if (Iter == PoisonVals.end()) 314 break; 315 Entry Val = *Iter; 316 CurVals[i] = Val.second; 317 --Val.first; 318 if (Val.first > 0) 319 *Iter = Val; 320 else 321 ++Iter; 322 } 323 int32_t Poison = ((CurVals[3] & 0xff) << 24) | ((CurVals[2] & 0xff) << 16) | 324 ((CurVals[1] & 0xff) << 8) | (CurVals[0] & 0xff); 325 if (Poison == 0) 326 continue; 327 auto *PoisonConst = ConstantInteger32::create(Ctx, IceType_i32, Poison); 328 auto *ZeroConst = ConstantInteger32::create(Ctx, IceType_i32, 0); 329 auto *OffsetConst = ConstantInteger32::create(Ctx, IceType_i32, Offset); 330 auto *PoisonAddrVar = Func->makeVariable(IceType_i32); 331 Context.insert(InstArithmetic::create(Func, InstArithmetic::Add, 332 PoisonAddrVar, FirstShadowLocVar, 333 OffsetConst)); 334 Context.insert(InstStore::create(Func, PoisonConst, PoisonAddrVar)); 335 ICE_TLS_GET_FIELD(LocalDtors) 336 ->emplace_back(InstStore::create(Func, ZeroConst, PoisonAddrVar)); 337 } 338 Context.advanceNext(); 339 } 340 341 void ASanInstrumentation::instrumentCall(LoweringContext &Context, 342 InstCall *Instr) { 343 auto *CallTarget = 344 llvm::dyn_cast<ConstantRelocatable>(Instr->getCallTarget()); 345 if (CallTarget == nullptr) 346 return; 347 348 std::string TargetName = CallTarget->getName().toStringOrEmpty(); 349 auto Subst = FuncSubstitutions.find(TargetName); 350 if (Subst == FuncSubstitutions.end()) 351 return; 352 353 std::string SubName = Subst->second; 354 Constant *NewFunc = Ctx->getConstantExternSym(Ctx->getGlobalString(SubName)); 355 auto *NewCall = 356 InstCall::create(Context.getNode()->getCfg(), Instr->getNumArgs(), 357 Instr->getDest(), NewFunc, Instr->isTailcall()); 358 for (SizeT I = 0, Args = Instr->getNumArgs(); I < Args; ++I) 359 NewCall->addArg(Instr->getArg(I)); 360 Context.insert(NewCall); 361 Instr->setDeleted(); 362 } 363 364 void ASanInstrumentation::instrumentLoad(LoweringContext &Context, 365 InstLoad *Instr) { 366 Operand *Src = Instr->getSourceAddress(); 367 if (auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) { 368 auto *NewLoad = InstLoad::create(Context.getNode()->getCfg(), 369 Instr->getDest(), instrumentReloc(Reloc)); 370 Instr->setDeleted(); 371 Context.insert(NewLoad); 372 Instr = NewLoad; 373 } 374 Constant *Func = 375 Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_check_load")); 376 instrumentAccess(Context, Instr->getSourceAddress(), 377 typeWidthInBytes(Instr->getDest()->getType()), Func); 378 } 379 380 void ASanInstrumentation::instrumentStore(LoweringContext &Context, 381 InstStore *Instr) { 382 Operand *Data = Instr->getData(); 383 if (auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Data)) { 384 auto *NewStore = InstStore::create( 385 Context.getNode()->getCfg(), instrumentReloc(Reloc), Instr->getAddr()); 386 Instr->setDeleted(); 387 Context.insert(NewStore); 388 Instr = NewStore; 389 } 390 Constant *Func = 391 Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_check_store")); 392 instrumentAccess(Context, Instr->getAddr(), 393 typeWidthInBytes(Instr->getData()->getType()), Func); 394 } 395 396 ConstantRelocatable * 397 ASanInstrumentation::instrumentReloc(ConstantRelocatable *Reloc) { 398 std::string DataName = Reloc->getName().toString(); 399 StringMap::const_iterator DataSub = FuncSubstitutions.find(DataName); 400 if (DataSub != FuncSubstitutions.end()) { 401 return ConstantRelocatable::create( 402 Ctx, Reloc->getType(), 403 RelocatableTuple(Reloc->getOffset(), RelocOffsetArray(0), 404 Ctx->getGlobalString(DataSub->second), 405 Reloc->getEmitString())); 406 } 407 return Reloc; 408 } 409 410 void ASanInstrumentation::instrumentAccess(LoweringContext &Context, 411 Operand *Op, SizeT Size, 412 Constant *CheckFunc) { 413 // Skip redundant checks within basic blocks 414 VarSizeMap *Checked = ICE_TLS_GET_FIELD(CheckedVars); 415 if (ICE_TLS_GET_FIELD(CurNode) != Context.getNode()) { 416 ICE_TLS_SET_FIELD(CurNode, Context.getNode()); 417 if (Checked == NULL) { 418 Checked = new VarSizeMap(); 419 ICE_TLS_SET_FIELD(CheckedVars, Checked); 420 } 421 Checked->clear(); 422 } 423 VarSizeMap::iterator PrevCheck = Checked->find(Op); 424 if (PrevCheck != Checked->end() && PrevCheck->second >= Size) 425 return; 426 else 427 Checked->insert({Op, Size}); 428 429 // check for known good local access 430 VarSizeMap::iterator LocalSize = ICE_TLS_GET_FIELD(LocalVars)->find(Op); 431 if (LocalSize != ICE_TLS_GET_FIELD(LocalVars)->end() && 432 LocalSize->second >= Size) 433 return; 434 if (isOkGlobalAccess(Op, Size)) 435 return; 436 constexpr SizeT NumArgs = 2; 437 constexpr Variable *Void = nullptr; 438 constexpr bool NoTailCall = false; 439 auto *Call = InstCall::create(Context.getNode()->getCfg(), NumArgs, Void, 440 CheckFunc, NoTailCall); 441 Call->addArg(Op); 442 Call->addArg(ConstantInteger32::create(Ctx, IceType_i32, Size)); 443 // play games to insert the call before the access instruction 444 InstList::iterator Next = Context.getNext(); 445 Context.setInsertPoint(Context.getCur()); 446 Context.insert(Call); 447 Context.setNext(Next); 448 } 449 450 // TODO(tlively): Trace back load and store addresses to find their real offsets 451 bool ASanInstrumentation::isOkGlobalAccess(Operand *Op, SizeT Size) { 452 auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Op); 453 if (Reloc == nullptr) 454 return false; 455 RelocOffsetT Offset = Reloc->getOffset(); 456 GlobalSizeMap::iterator GlobalSize = GlobalSizes.find(Reloc->getName()); 457 return GlobalSize != GlobalSizes.end() && GlobalSize->second - Offset >= Size; 458 } 459 460 void ASanInstrumentation::instrumentRet(LoweringContext &Context, InstRet *) { 461 Cfg *Func = Context.getNode()->getCfg(); 462 Context.setInsertPoint(Context.getCur()); 463 for (InstStore *RzUnpoison : *ICE_TLS_GET_FIELD(LocalDtors)) { 464 Context.insert( 465 InstStore::create(Func, RzUnpoison->getData(), RzUnpoison->getAddr())); 466 } 467 Context.advanceCur(); 468 Context.advanceNext(); 469 } 470 471 void ASanInstrumentation::instrumentStart(Cfg *Func) { 472 Constant *ShadowMemInit = 473 Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_init")); 474 constexpr SizeT NumArgs = 3; 475 constexpr Variable *Void = nullptr; 476 constexpr bool NoTailCall = false; 477 auto *Call = InstCall::create(Func, NumArgs, Void, ShadowMemInit, NoTailCall); 478 Func->getEntryNode()->getInsts().push_front(Call); 479 480 instrumentGlobals(*getGlobals()); 481 482 Call->addArg(ConstantInteger32::create(Ctx, IceType_i32, RzGlobalsNum)); 483 Call->addArg(Ctx->getConstantSym(0, Ctx->getGlobalString(RzArrayName))); 484 Call->addArg(Ctx->getConstantSym(0, Ctx->getGlobalString(RzSizesName))); 485 } 486 487 // TODO(tlively): make this more efficient with swap idiom 488 void ASanInstrumentation::finishFunc(Cfg *) { 489 ICE_TLS_GET_FIELD(LocalVars)->clear(); 490 ICE_TLS_GET_FIELD(LocalDtors)->clear(); 491 } 492 493 } // end of namespace Ice 494