Home | History | Annotate | Download | only in llvm-extract
      1 //===- llvm-extract.cpp - LLVM function extraction utility ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This utility changes the input module to only contain a single function,
     11 // which is primarily used for debugging transformations.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "llvm/ADT/SetVector.h"
     16 #include "llvm/ADT/SmallPtrSet.h"
     17 #include "llvm/Bitcode/BitcodeWriterPass.h"
     18 #include "llvm/IR/DataLayout.h"
     19 #include "llvm/IR/IRPrintingPasses.h"
     20 #include "llvm/IR/LLVMContext.h"
     21 #include "llvm/IR/Module.h"
     22 #include "llvm/IRReader/IRReader.h"
     23 #include "llvm/IR/LegacyPassManager.h"
     24 #include "llvm/Support/CommandLine.h"
     25 #include "llvm/Support/FileSystem.h"
     26 #include "llvm/Support/ManagedStatic.h"
     27 #include "llvm/Support/PrettyStackTrace.h"
     28 #include "llvm/Support/Regex.h"
     29 #include "llvm/Support/Signals.h"
     30 #include "llvm/Support/SourceMgr.h"
     31 #include "llvm/Support/SystemUtils.h"
     32 #include "llvm/Support/ToolOutputFile.h"
     33 #include "llvm/Transforms/IPO.h"
     34 #include <memory>
     35 using namespace llvm;
     36 
     37 // InputFilename - The filename to read from.
     38 static cl::opt<std::string>
     39 InputFilename(cl::Positional, cl::desc("<input bitcode file>"),
     40               cl::init("-"), cl::value_desc("filename"));
     41 
     42 static cl::opt<std::string>
     43 OutputFilename("o", cl::desc("Specify output filename"),
     44                cl::value_desc("filename"), cl::init("-"));
     45 
     46 static cl::opt<bool>
     47 Force("f", cl::desc("Enable binary output on terminals"));
     48 
     49 static cl::opt<bool>
     50 DeleteFn("delete", cl::desc("Delete specified Globals from Module"));
     51 
     52 // ExtractFuncs - The functions to extract from the module.
     53 static cl::list<std::string>
     54 ExtractFuncs("func", cl::desc("Specify function to extract"),
     55              cl::ZeroOrMore, cl::value_desc("function"));
     56 
     57 // ExtractRegExpFuncs - The functions, matched via regular expression, to
     58 // extract from the module.
     59 static cl::list<std::string>
     60 ExtractRegExpFuncs("rfunc", cl::desc("Specify function(s) to extract using a "
     61                                      "regular expression"),
     62                    cl::ZeroOrMore, cl::value_desc("rfunction"));
     63 
     64 // ExtractAlias - The alias to extract from the module.
     65 static cl::list<std::string>
     66 ExtractAliases("alias", cl::desc("Specify alias to extract"),
     67                cl::ZeroOrMore, cl::value_desc("alias"));
     68 
     69 
     70 // ExtractRegExpAliases - The aliases, matched via regular expression, to
     71 // extract from the module.
     72 static cl::list<std::string>
     73 ExtractRegExpAliases("ralias", cl::desc("Specify alias(es) to extract using a "
     74                                         "regular expression"),
     75                      cl::ZeroOrMore, cl::value_desc("ralias"));
     76 
     77 // ExtractGlobals - The globals to extract from the module.
     78 static cl::list<std::string>
     79 ExtractGlobals("glob", cl::desc("Specify global to extract"),
     80                cl::ZeroOrMore, cl::value_desc("global"));
     81 
     82 // ExtractRegExpGlobals - The globals, matched via regular expression, to
     83 // extract from the module...
     84 static cl::list<std::string>
     85 ExtractRegExpGlobals("rglob", cl::desc("Specify global(s) to extract using a "
     86                                        "regular expression"),
     87                      cl::ZeroOrMore, cl::value_desc("rglobal"));
     88 
     89 static cl::opt<bool>
     90 OutputAssembly("S",
     91                cl::desc("Write output as LLVM assembly"), cl::Hidden);
     92 
     93 static cl::opt<bool> PreserveBitcodeUseListOrder(
     94     "preserve-bc-uselistorder",
     95     cl::desc("Preserve use-list order when writing LLVM bitcode."),
     96     cl::init(true), cl::Hidden);
     97 
     98 static cl::opt<bool> PreserveAssemblyUseListOrder(
     99     "preserve-ll-uselistorder",
    100     cl::desc("Preserve use-list order when writing LLVM assembly."),
    101     cl::init(false), cl::Hidden);
    102 
    103 int main(int argc, char **argv) {
    104   // Print a stack trace if we signal out.
    105   sys::PrintStackTraceOnErrorSignal();
    106   PrettyStackTraceProgram X(argc, argv);
    107 
    108   LLVMContext &Context = getGlobalContext();
    109   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
    110   cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n");
    111 
    112   // Use lazy loading, since we only care about selected global values.
    113   SMDiagnostic Err;
    114   std::unique_ptr<Module> M = getLazyIRFileModule(InputFilename, Err, Context);
    115 
    116   if (!M.get()) {
    117     Err.print(argv[0], errs());
    118     return 1;
    119   }
    120 
    121   // Use SetVector to avoid duplicates.
    122   SetVector<GlobalValue *> GVs;
    123 
    124   // Figure out which aliases we should extract.
    125   for (size_t i = 0, e = ExtractAliases.size(); i != e; ++i) {
    126     GlobalAlias *GA = M->getNamedAlias(ExtractAliases[i]);
    127     if (!GA) {
    128       errs() << argv[0] << ": program doesn't contain alias named '"
    129              << ExtractAliases[i] << "'!\n";
    130       return 1;
    131     }
    132     GVs.insert(GA);
    133   }
    134 
    135   // Extract aliases via regular expression matching.
    136   for (size_t i = 0, e = ExtractRegExpAliases.size(); i != e; ++i) {
    137     std::string Error;
    138     Regex RegEx(ExtractRegExpAliases[i]);
    139     if (!RegEx.isValid(Error)) {
    140       errs() << argv[0] << ": '" << ExtractRegExpAliases[i] << "' "
    141         "invalid regex: " << Error;
    142     }
    143     bool match = false;
    144     for (Module::alias_iterator GA = M->alias_begin(), E = M->alias_end();
    145          GA != E; GA++) {
    146       if (RegEx.match(GA->getName())) {
    147         GVs.insert(&*GA);
    148         match = true;
    149       }
    150     }
    151     if (!match) {
    152       errs() << argv[0] << ": program doesn't contain global named '"
    153              << ExtractRegExpAliases[i] << "'!\n";
    154       return 1;
    155     }
    156   }
    157 
    158   // Figure out which globals we should extract.
    159   for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) {
    160     GlobalValue *GV = M->getNamedGlobal(ExtractGlobals[i]);
    161     if (!GV) {
    162       errs() << argv[0] << ": program doesn't contain global named '"
    163              << ExtractGlobals[i] << "'!\n";
    164       return 1;
    165     }
    166     GVs.insert(GV);
    167   }
    168 
    169   // Extract globals via regular expression matching.
    170   for (size_t i = 0, e = ExtractRegExpGlobals.size(); i != e; ++i) {
    171     std::string Error;
    172     Regex RegEx(ExtractRegExpGlobals[i]);
    173     if (!RegEx.isValid(Error)) {
    174       errs() << argv[0] << ": '" << ExtractRegExpGlobals[i] << "' "
    175         "invalid regex: " << Error;
    176     }
    177     bool match = false;
    178     for (auto &GV : M->globals()) {
    179       if (RegEx.match(GV.getName())) {
    180         GVs.insert(&GV);
    181         match = true;
    182       }
    183     }
    184     if (!match) {
    185       errs() << argv[0] << ": program doesn't contain global named '"
    186              << ExtractRegExpGlobals[i] << "'!\n";
    187       return 1;
    188     }
    189   }
    190 
    191   // Figure out which functions we should extract.
    192   for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) {
    193     GlobalValue *GV = M->getFunction(ExtractFuncs[i]);
    194     if (!GV) {
    195       errs() << argv[0] << ": program doesn't contain function named '"
    196              << ExtractFuncs[i] << "'!\n";
    197       return 1;
    198     }
    199     GVs.insert(GV);
    200   }
    201   // Extract functions via regular expression matching.
    202   for (size_t i = 0, e = ExtractRegExpFuncs.size(); i != e; ++i) {
    203     std::string Error;
    204     StringRef RegExStr = ExtractRegExpFuncs[i];
    205     Regex RegEx(RegExStr);
    206     if (!RegEx.isValid(Error)) {
    207       errs() << argv[0] << ": '" << ExtractRegExpFuncs[i] << "' "
    208         "invalid regex: " << Error;
    209     }
    210     bool match = false;
    211     for (Module::iterator F = M->begin(), E = M->end(); F != E;
    212          F++) {
    213       if (RegEx.match(F->getName())) {
    214         GVs.insert(&*F);
    215         match = true;
    216       }
    217     }
    218     if (!match) {
    219       errs() << argv[0] << ": program doesn't contain global named '"
    220              << ExtractRegExpFuncs[i] << "'!\n";
    221       return 1;
    222     }
    223   }
    224 
    225   auto Materialize = [&](GlobalValue &GV) {
    226     if (std::error_code EC = GV.materialize()) {
    227       errs() << argv[0] << ": error reading input: " << EC.message() << "\n";
    228       exit(1);
    229     }
    230   };
    231 
    232   // Materialize requisite global values.
    233   if (!DeleteFn) {
    234     for (size_t i = 0, e = GVs.size(); i != e; ++i)
    235       Materialize(*GVs[i]);
    236   } else {
    237     // Deleting. Materialize every GV that's *not* in GVs.
    238     SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end());
    239     for (auto &F : *M) {
    240       if (!GVSet.count(&F))
    241         Materialize(F);
    242     }
    243   }
    244 
    245   {
    246     std::vector<GlobalValue *> Gvs(GVs.begin(), GVs.end());
    247     legacy::PassManager Extract;
    248     Extract.add(createGVExtractionPass(Gvs, DeleteFn));
    249     Extract.run(*M);
    250 
    251     // Now that we have all the GVs we want, mark the module as fully
    252     // materialized.
    253     // FIXME: should the GVExtractionPass handle this?
    254     M->materializeAll();
    255   }
    256 
    257   // In addition to deleting all other functions, we also want to spiff it
    258   // up a little bit.  Do this now.
    259   legacy::PassManager Passes;
    260 
    261   if (!DeleteFn)
    262     Passes.add(createGlobalDCEPass());           // Delete unreachable globals
    263   Passes.add(createStripDeadDebugInfoPass());    // Remove dead debug info
    264   Passes.add(createStripDeadPrototypesPass());   // Remove dead func decls
    265 
    266   std::error_code EC;
    267   tool_output_file Out(OutputFilename, EC, sys::fs::F_None);
    268   if (EC) {
    269     errs() << EC.message() << '\n';
    270     return 1;
    271   }
    272 
    273   if (OutputAssembly)
    274     Passes.add(
    275         createPrintModulePass(Out.os(), "", PreserveAssemblyUseListOrder));
    276   else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true))
    277     Passes.add(createBitcodeWriterPass(Out.os(), PreserveBitcodeUseListOrder));
    278 
    279   Passes.run(*M.get());
    280 
    281   // Declare success.
    282   Out.keep();
    283 
    284   return 0;
    285 }
    286