1 //=======- PaddingChecker.cpp ------------------------------------*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines a checker that checks for padding that could be 11 // removed by re-ordering members. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ClangSACheckers.h" 16 #include "clang/AST/CharUnits.h" 17 #include "clang/AST/DeclTemplate.h" 18 #include "clang/AST/RecordLayout.h" 19 #include "clang/AST/RecursiveASTVisitor.h" 20 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" 21 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 22 #include "clang/StaticAnalyzer/Core/Checker.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <numeric> 28 29 using namespace clang; 30 using namespace ento; 31 32 namespace { 33 class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> { 34 private: 35 mutable std::unique_ptr<BugType> PaddingBug; 36 mutable int64_t AllowedPad; 37 mutable BugReporter *BR; 38 39 public: 40 void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, 41 BugReporter &BRArg) const { 42 BR = &BRArg; 43 AllowedPad = 44 MGR.getAnalyzerOptions().getOptionAsInteger("AllowedPad", 24, this); 45 assert(AllowedPad >= 0 && "AllowedPad option should be non-negative"); 46 47 // The calls to checkAST* from AnalysisConsumer don't 48 // visit template instantiations or lambda classes. We 49 // want to visit those, so we make our own RecursiveASTVisitor. 50 struct LocalVisitor : public RecursiveASTVisitor<LocalVisitor> { 51 const PaddingChecker *Checker; 52 bool shouldVisitTemplateInstantiations() const { return true; } 53 bool shouldVisitImplicitCode() const { return true; } 54 explicit LocalVisitor(const PaddingChecker *Checker) : Checker(Checker) {} 55 bool VisitRecordDecl(const RecordDecl *RD) { 56 Checker->visitRecord(RD); 57 return true; 58 } 59 bool VisitVarDecl(const VarDecl *VD) { 60 Checker->visitVariable(VD); 61 return true; 62 } 63 // TODO: Visit array new and mallocs for arrays. 64 }; 65 66 LocalVisitor visitor(this); 67 visitor.TraverseDecl(const_cast<TranslationUnitDecl *>(TUD)); 68 } 69 70 /// \brief Look for records of overly padded types. If padding * 71 /// PadMultiplier exceeds AllowedPad, then generate a report. 72 /// PadMultiplier is used to share code with the array padding 73 /// checker. 74 void visitRecord(const RecordDecl *RD, uint64_t PadMultiplier = 1) const { 75 if (shouldSkipDecl(RD)) 76 return; 77 78 auto &ASTContext = RD->getASTContext(); 79 const ASTRecordLayout &RL = ASTContext.getASTRecordLayout(RD); 80 assert(llvm::isPowerOf2_64(RL.getAlignment().getQuantity())); 81 82 CharUnits BaselinePad = calculateBaselinePad(RD, ASTContext, RL); 83 if (BaselinePad.isZero()) 84 return; 85 CharUnits OptimalPad = calculateOptimalPad(RD, ASTContext, RL); 86 87 CharUnits DiffPad = PadMultiplier * (BaselinePad - OptimalPad); 88 if (DiffPad.getQuantity() <= AllowedPad) { 89 assert(!DiffPad.isNegative() && "DiffPad should not be negative"); 90 // There is not enough excess padding to trigger a warning. 91 return; 92 } 93 reportRecord(RD, BaselinePad, OptimalPad); 94 } 95 96 /// \brief Look for arrays of overly padded types. If the padding of the 97 /// array type exceeds AllowedPad, then generate a report. 98 void visitVariable(const VarDecl *VD) const { 99 const ArrayType *ArrTy = VD->getType()->getAsArrayTypeUnsafe(); 100 if (ArrTy == nullptr) 101 return; 102 uint64_t Elts = 0; 103 if (const ConstantArrayType *CArrTy = dyn_cast<ConstantArrayType>(ArrTy)) 104 Elts = CArrTy->getSize().getZExtValue(); 105 if (Elts == 0) 106 return; 107 const RecordType *RT = ArrTy->getElementType()->getAs<RecordType>(); 108 if (RT == nullptr) 109 return; 110 111 // TODO: Recurse into the fields and base classes to see if any 112 // of those have excess padding. 113 visitRecord(RT->getDecl(), Elts); 114 } 115 116 bool shouldSkipDecl(const RecordDecl *RD) const { 117 auto Location = RD->getLocation(); 118 // If the construct doesn't have a source file, then it's not something 119 // we want to diagnose. 120 if (!Location.isValid()) 121 return true; 122 SrcMgr::CharacteristicKind Kind = 123 BR->getSourceManager().getFileCharacteristic(Location); 124 // Throw out all records that come from system headers. 125 if (Kind != SrcMgr::C_User) 126 return true; 127 128 // Not going to attempt to optimize unions. 129 if (RD->isUnion()) 130 return true; 131 // How do you reorder fields if you haven't got any? 132 if (RD->field_empty()) 133 return true; 134 if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 135 // Tail padding with base classes ends up being very complicated. 136 // We will skip objects with base classes for now. 137 if (CXXRD->getNumBases() != 0) 138 return true; 139 // Virtual bases are complicated, skipping those for now. 140 if (CXXRD->getNumVBases() != 0) 141 return true; 142 // Can't layout a template, so skip it. We do still layout the 143 // instantiations though. 144 if (CXXRD->getTypeForDecl()->isDependentType()) 145 return true; 146 if (CXXRD->getTypeForDecl()->isInstantiationDependentType()) 147 return true; 148 } 149 auto IsTrickyField = [](const FieldDecl *FD) -> bool { 150 // Bitfield layout is hard. 151 if (FD->isBitField()) 152 return true; 153 154 // Variable length arrays are tricky too. 155 QualType Ty = FD->getType(); 156 if (Ty->isIncompleteArrayType()) 157 return true; 158 return false; 159 }; 160 161 if (std::any_of(RD->field_begin(), RD->field_end(), IsTrickyField)) 162 return true; 163 return false; 164 } 165 166 static CharUnits calculateBaselinePad(const RecordDecl *RD, 167 const ASTContext &ASTContext, 168 const ASTRecordLayout &RL) { 169 CharUnits PaddingSum; 170 CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0)); 171 for (const auto &FD : RD->fields()) { 172 // This checker only cares about the padded size of the 173 // field, and not the data size. If the field is a record 174 // with tail padding, then we won't put that number in our 175 // total because reordering fields won't fix that problem. 176 CharUnits FieldSize = ASTContext.getTypeSizeInChars(FD->getType()); 177 auto FieldOffsetBits = RL.getFieldOffset(FD->getFieldIndex()); 178 CharUnits FieldOffset = ASTContext.toCharUnitsFromBits(FieldOffsetBits); 179 PaddingSum += (FieldOffset - Offset); 180 Offset = FieldOffset + FieldSize; 181 } 182 PaddingSum += RL.getSize() - Offset; 183 return PaddingSum; 184 } 185 186 /// Optimal padding overview: 187 /// 1. Find a close approximation to where we can place our first field. 188 /// This will usually be at offset 0. 189 /// 2. Try to find the best field that can legally be placed at the current 190 /// offset. 191 /// a. "Best" is the largest alignment that is legal, but smallest size. 192 /// This is to account for overly aligned types. 193 /// 3. If no fields can fit, pad by rounding the current offset up to the 194 /// smallest alignment requirement of our fields. Measure and track the 195 // amount of padding added. Go back to 2. 196 /// 4. Increment the current offset by the size of the chosen field. 197 /// 5. Remove the chosen field from the set of future possibilities. 198 /// 6. Go back to 2 if there are still unplaced fields. 199 /// 7. Add tail padding by rounding the current offset up to the structure 200 /// alignment. Track the amount of padding added. 201 202 static CharUnits calculateOptimalPad(const RecordDecl *RD, 203 const ASTContext &ASTContext, 204 const ASTRecordLayout &RL) { 205 struct CharUnitPair { 206 CharUnits Align; 207 CharUnits Size; 208 bool operator<(const CharUnitPair &RHS) const { 209 // Order from small alignments to large alignments, 210 // then large sizes to small sizes. 211 return std::make_pair(Align, -Size) < 212 std::make_pair(RHS.Align, -RHS.Size); 213 } 214 }; 215 SmallVector<CharUnitPair, 20> Fields; 216 auto GatherSizesAndAlignments = [](const FieldDecl *FD) { 217 CharUnitPair RetVal; 218 auto &Ctx = FD->getASTContext(); 219 std::tie(RetVal.Size, RetVal.Align) = 220 Ctx.getTypeInfoInChars(FD->getType()); 221 assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity())); 222 if (auto Max = FD->getMaxAlignment()) 223 RetVal.Align = std::max(Ctx.toCharUnitsFromBits(Max), RetVal.Align); 224 return RetVal; 225 }; 226 std::transform(RD->field_begin(), RD->field_end(), 227 std::back_inserter(Fields), GatherSizesAndAlignments); 228 std::sort(Fields.begin(), Fields.end()); 229 230 // This lets us skip over vptrs and non-virtual bases, 231 // so that we can just worry about the fields in our object. 232 // Note that this does cause us to miss some cases where we 233 // could pack more bytes in to a base class's tail padding. 234 CharUnits NewOffset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0)); 235 CharUnits NewPad; 236 237 while (!Fields.empty()) { 238 unsigned TrailingZeros = 239 llvm::countTrailingZeros((unsigned long long)NewOffset.getQuantity()); 240 // If NewOffset is zero, then countTrailingZeros will be 64. Shifting 241 // 64 will overflow our unsigned long long. Shifting 63 will turn 242 // our long long (and CharUnits internal type) negative. So shift 62. 243 long long CurAlignmentBits = 1ull << (std::min)(TrailingZeros, 62u); 244 CharUnits CurAlignment = CharUnits::fromQuantity(CurAlignmentBits); 245 CharUnitPair InsertPoint = {CurAlignment, CharUnits::Zero()}; 246 auto CurBegin = Fields.begin(); 247 auto CurEnd = Fields.end(); 248 249 // In the typical case, this will find the last element 250 // of the vector. We won't find a middle element unless 251 // we started on a poorly aligned address or have an overly 252 // aligned field. 253 auto Iter = std::upper_bound(CurBegin, CurEnd, InsertPoint); 254 if (Iter != CurBegin) { 255 // We found a field that we can layout with the current alignment. 256 --Iter; 257 NewOffset += Iter->Size; 258 Fields.erase(Iter); 259 } else { 260 // We are poorly aligned, and we need to pad in order to layout another 261 // field. Round up to at least the smallest field alignment that we 262 // currently have. 263 CharUnits NextOffset = NewOffset.RoundUpToAlignment(Fields[0].Align); 264 NewPad += NextOffset - NewOffset; 265 NewOffset = NextOffset; 266 } 267 } 268 // Calculate tail padding. 269 CharUnits NewSize = NewOffset.RoundUpToAlignment(RL.getAlignment()); 270 NewPad += NewSize - NewOffset; 271 return NewPad; 272 } 273 274 void reportRecord(const RecordDecl *RD, CharUnits BaselinePad, 275 CharUnits TargetPad) const { 276 if (!PaddingBug) 277 PaddingBug = 278 llvm::make_unique<BugType>(this, "Excessive Padding", "Performance"); 279 280 SmallString<100> Buf; 281 llvm::raw_svector_ostream Os(Buf); 282 283 Os << "Excessive padding in '"; 284 Os << QualType::getAsString(RD->getTypeForDecl(), Qualifiers()) << "'"; 285 286 if (auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) { 287 // TODO: make this show up better in the console output and in 288 // the HTML. Maybe just make it show up in HTML like the path 289 // diagnostics show. 290 SourceLocation ILoc = TSD->getPointOfInstantiation(); 291 if (ILoc.isValid()) 292 Os << " instantiated here: " 293 << ILoc.printToString(BR->getSourceManager()); 294 } 295 296 Os << " (" << BaselinePad.getQuantity() << " padding bytes, where " 297 << TargetPad.getQuantity() << " is optimal). Consider reordering " 298 << "the fields or adding explicit padding members."; 299 300 PathDiagnosticLocation CELoc = 301 PathDiagnosticLocation::create(RD, BR->getSourceManager()); 302 303 auto Report = llvm::make_unique<BugReport>(*PaddingBug, Os.str(), CELoc); 304 Report->setDeclWithIssue(RD); 305 Report->addRange(RD->getSourceRange()); 306 307 BR->emitReport(std::move(Report)); 308 } 309 }; 310 } 311 312 void ento::registerPaddingChecker(CheckerManager &Mgr) { 313 Mgr.registerChecker<PaddingChecker>(); 314 } 315