Home | History | Annotate | Download | only in docs
      1 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
      2                       "http://www.w3.org/TR/html4/strict.dtd">
      3 <html>
      4 <head>
      5   <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
      6   <title>LLVM Programmer's Manual</title>
      7   <link rel="stylesheet" href="llvm.css" type="text/css">
      8 </head>
      9 <body>
     10 
     11 <h1>
     12   LLVM Programmer's Manual
     13 </h1>
     14 
     15 <ol>
     16   <li><a href="#introduction">Introduction</a></li>
     17   <li><a href="#general">General Information</a>
     18     <ul>
     19       <li><a href="#stl">The C++ Standard Template Library</a></li>
     20 <!--
     21       <li>The <tt>-time-passes</tt> option</li>
     22       <li>How to use the LLVM Makefile system</li>
     23       <li>How to write a regression test</li>
     24 
     25 --> 
     26     </ul>
     27   </li>
     28   <li><a href="#apis">Important and useful LLVM APIs</a>
     29     <ul>
     30       <li><a href="#isa">The <tt>isa&lt;&gt;</tt>, <tt>cast&lt;&gt;</tt>
     31 and <tt>dyn_cast&lt;&gt;</tt> templates</a> </li>
     32       <li><a href="#string_apis">Passing strings (the <tt>StringRef</tt>
     33 and <tt>Twine</tt> classes)</a>
     34         <ul>
     35           <li><a href="#StringRef">The <tt>StringRef</tt> class</a> </li>
     36           <li><a href="#Twine">The <tt>Twine</tt> class</a> </li>
     37         </ul>
     38       </li>
     39       <li><a href="#DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt>
     40 option</a>
     41         <ul>
     42           <li><a href="#DEBUG_TYPE">Fine grained debug info with <tt>DEBUG_TYPE</tt>
     43 and the <tt>-debug-only</tt> option</a> </li>
     44         </ul>
     45       </li>
     46       <li><a href="#Statistic">The <tt>Statistic</tt> class &amp; <tt>-stats</tt>
     47 option</a></li>
     48 <!--
     49       <li>The <tt>InstVisitor</tt> template
     50       <li>The general graph API
     51 --> 
     52       <li><a href="#ViewGraph">Viewing graphs while debugging code</a></li>
     53     </ul>
     54   </li>
     55   <li><a href="#datastructure">Picking the Right Data Structure for a Task</a>
     56     <ul>
     57     <li><a href="#ds_sequential">Sequential Containers (std::vector, std::list, etc)</a>
     58     <ul>
     59       <li><a href="#dss_arrayref">llvm/ADT/ArrayRef.h</a></li>
     60       <li><a href="#dss_fixedarrays">Fixed Size Arrays</a></li>
     61       <li><a href="#dss_heaparrays">Heap Allocated Arrays</a></li>
     62       <li><a href="#dss_tinyptrvector">"llvm/ADT/TinyPtrVector.h"</a></li>
     63       <li><a href="#dss_smallvector">"llvm/ADT/SmallVector.h"</a></li>
     64       <li><a href="#dss_vector">&lt;vector&gt;</a></li>
     65       <li><a href="#dss_deque">&lt;deque&gt;</a></li>
     66       <li><a href="#dss_list">&lt;list&gt;</a></li>
     67       <li><a href="#dss_ilist">llvm/ADT/ilist.h</a></li>
     68       <li><a href="#dss_packedvector">llvm/ADT/PackedVector.h</a></li>
     69       <li><a href="#dss_other">Other Sequential Container Options</a></li>
     70     </ul></li>
     71     <li><a href="#ds_string">String-like containers</a>
     72     <ul>
     73       <li><a href="#dss_stringref">llvm/ADT/StringRef.h</a></li>
     74       <li><a href="#dss_twine">llvm/ADT/Twine.h</a></li>
     75       <li><a href="#dss_smallstring">llvm/ADT/SmallString.h</a></li>
     76       <li><a href="#dss_stdstring">std::string</a></li>
     77     </ul></li>
     78     <li><a href="#ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
     79     <ul>
     80       <li><a href="#dss_sortedvectorset">A sorted 'vector'</a></li>
     81       <li><a href="#dss_smallset">"llvm/ADT/SmallSet.h"</a></li>
     82       <li><a href="#dss_smallptrset">"llvm/ADT/SmallPtrSet.h"</a></li>
     83       <li><a href="#dss_denseset">"llvm/ADT/DenseSet.h"</a></li>
     84       <li><a href="#dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a></li>
     85       <li><a href="#dss_set">&lt;set&gt;</a></li>
     86       <li><a href="#dss_setvector">"llvm/ADT/SetVector.h"</a></li>
     87       <li><a href="#dss_uniquevector">"llvm/ADT/UniqueVector.h"</a></li>
     88       <li><a href="#dss_otherset">Other Set-Like ContainerOptions</a></li>
     89     </ul></li>
     90     <li><a href="#ds_map">Map-Like Containers (std::map, DenseMap, etc)</a>
     91     <ul>
     92       <li><a href="#dss_sortedvectormap">A sorted 'vector'</a></li>
     93       <li><a href="#dss_stringmap">"llvm/ADT/StringMap.h"</a></li>
     94       <li><a href="#dss_indexedmap">"llvm/ADT/IndexedMap.h"</a></li>
     95       <li><a href="#dss_densemap">"llvm/ADT/DenseMap.h"</a></li>
     96       <li><a href="#dss_valuemap">"llvm/ADT/ValueMap.h"</a></li>
     97       <li><a href="#dss_intervalmap">"llvm/ADT/IntervalMap.h"</a></li>
     98       <li><a href="#dss_map">&lt;map&gt;</a></li>
     99       <li><a href="#dss_inteqclasses">"llvm/ADT/IntEqClasses.h"</a></li>
    100       <li><a href="#dss_othermap">Other Map-Like Container Options</a></li>
    101     </ul></li>
    102     <li><a href="#ds_bit">BitVector-like containers</a>
    103     <ul>
    104       <li><a href="#dss_bitvector">A dense bitvector</a></li>
    105       <li><a href="#dss_smallbitvector">A "small" dense bitvector</a></li>
    106       <li><a href="#dss_sparsebitvector">A sparse bitvector</a></li>
    107     </ul></li>
    108   </ul>
    109   </li>
    110   <li><a href="#common">Helpful Hints for Common Operations</a>
    111     <ul>
    112       <li><a href="#inspection">Basic Inspection and Traversal Routines</a>
    113         <ul>
    114           <li><a href="#iterate_function">Iterating over the <tt>BasicBlock</tt>s
    115 in a <tt>Function</tt></a> </li>
    116           <li><a href="#iterate_basicblock">Iterating over the <tt>Instruction</tt>s
    117 in a <tt>BasicBlock</tt></a> </li>
    118           <li><a href="#iterate_institer">Iterating over the <tt>Instruction</tt>s
    119 in a <tt>Function</tt></a> </li>
    120           <li><a href="#iterate_convert">Turning an iterator into a
    121 class pointer</a> </li>
    122           <li><a href="#iterate_complex">Finding call sites: a more
    123 complex example</a> </li>
    124           <li><a href="#calls_and_invokes">Treating calls and invokes
    125 the same way</a> </li>
    126           <li><a href="#iterate_chains">Iterating over def-use &amp;
    127 use-def chains</a> </li>
    128           <li><a href="#iterate_preds">Iterating over predecessors &amp;
    129 successors of blocks</a></li>
    130         </ul>
    131       </li>
    132       <li><a href="#simplechanges">Making simple changes</a>
    133         <ul>
    134           <li><a href="#schanges_creating">Creating and inserting new
    135 		 <tt>Instruction</tt>s</a> </li>
    136           <li><a href="#schanges_deleting">Deleting 		 <tt>Instruction</tt>s</a> </li>
    137           <li><a href="#schanges_replacing">Replacing an 		 <tt>Instruction</tt>
    138 with another <tt>Value</tt></a> </li>
    139           <li><a href="#schanges_deletingGV">Deleting <tt>GlobalVariable</tt>s</a> </li>  
    140         </ul>
    141       </li>
    142       <li><a href="#create_types">How to Create Types</a></li>
    143 <!--
    144     <li>Working with the Control Flow Graph
    145     <ul>
    146       <li>Accessing predecessors and successors of a <tt>BasicBlock</tt>
    147       <li>
    148       <li>
    149     </ul>
    150 --> 
    151     </ul>
    152   </li>
    153 
    154   <li><a href="#threading">Threads and LLVM</a>
    155   <ul>
    156     <li><a href="#startmultithreaded">Entering and Exiting Multithreaded Mode
    157         </a></li>
    158     <li><a href="#shutdown">Ending execution with <tt>llvm_shutdown()</tt></a></li>
    159     <li><a href="#managedstatic">Lazy initialization with <tt>ManagedStatic</tt></a></li>
    160     <li><a href="#llvmcontext">Achieving Isolation with <tt>LLVMContext</tt></a></li>
    161     <li><a href="#jitthreading">Threads and the JIT</a></li>
    162   </ul>
    163   </li>
    164 
    165   <li><a href="#advanced">Advanced Topics</a>
    166   <ul>
    167 
    168   <li><a href="#SymbolTable">The <tt>ValueSymbolTable</tt> class</a></li>
    169   <li><a href="#UserLayout">The <tt>User</tt> and owned <tt>Use</tt> classes' memory layout</a></li>
    170   </ul></li>
    171 
    172   <li><a href="#coreclasses">The Core LLVM Class Hierarchy Reference</a>
    173     <ul>
    174       <li><a href="#Type">The <tt>Type</tt> class</a> </li>
    175       <li><a href="#Module">The <tt>Module</tt> class</a></li>
    176       <li><a href="#Value">The <tt>Value</tt> class</a>
    177       <ul>
    178         <li><a href="#User">The <tt>User</tt> class</a>
    179         <ul>
    180           <li><a href="#Instruction">The <tt>Instruction</tt> class</a></li>
    181           <li><a href="#Constant">The <tt>Constant</tt> class</a>
    182           <ul>
    183             <li><a href="#GlobalValue">The <tt>GlobalValue</tt> class</a>
    184             <ul>
    185               <li><a href="#Function">The <tt>Function</tt> class</a></li>
    186               <li><a href="#GlobalVariable">The <tt>GlobalVariable</tt> class</a></li>
    187             </ul>
    188             </li>
    189           </ul>
    190           </li>
    191         </ul>
    192         </li>
    193         <li><a href="#BasicBlock">The <tt>BasicBlock</tt> class</a></li>
    194         <li><a href="#Argument">The <tt>Argument</tt> class</a></li>
    195       </ul>
    196       </li>
    197     </ul>
    198   </li>
    199 </ol>
    200 
    201 <div class="doc_author">    
    202   <p>Written by <a href="mailto:sabre (a] nondot.org">Chris Lattner</a>, 
    203                 <a href="mailto:dhurjati (a] cs.uiuc.edu">Dinakar Dhurjati</a>, 
    204                 <a href="mailto:ggreif (a] gmail.com">Gabor Greif</a>, 
    205                 <a href="mailto:jstanley (a] cs.uiuc.edu">Joel Stanley</a>,
    206                 <a href="mailto:rspencer (a] x10sys.com">Reid Spencer</a> and
    207                 <a href="mailto:owen (a] apple.com">Owen Anderson</a></p>
    208 </div>
    209 
    210 <!-- *********************************************************************** -->
    211 <h2>
    212   <a name="introduction">Introduction </a>
    213 </h2>
    214 <!-- *********************************************************************** -->
    215 
    216 <div>
    217 
    218 <p>This document is meant to highlight some of the important classes and
    219 interfaces available in the LLVM source-base.  This manual is not
    220 intended to explain what LLVM is, how it works, and what LLVM code looks
    221 like.  It assumes that you know the basics of LLVM and are interested
    222 in writing transformations or otherwise analyzing or manipulating the
    223 code.</p>
    224 
    225 <p>This document should get you oriented so that you can find your
    226 way in the continuously growing source code that makes up the LLVM
    227 infrastructure. Note that this manual is not intended to serve as a
    228 replacement for reading the source code, so if you think there should be
    229 a method in one of these classes to do something, but it's not listed,
    230 check the source.  Links to the <a href="/doxygen/">doxygen</a> sources
    231 are provided to make this as easy as possible.</p>
    232 
    233 <p>The first section of this document describes general information that is
    234 useful to know when working in the LLVM infrastructure, and the second describes
    235 the Core LLVM classes.  In the future this manual will be extended with
    236 information describing how to use extension libraries, such as dominator
    237 information, CFG traversal routines, and useful utilities like the <tt><a
    238 href="/doxygen/InstVisitor_8h-source.html">InstVisitor</a></tt> template.</p>
    239 
    240 </div>
    241 
    242 <!-- *********************************************************************** -->
    243 <h2>
    244   <a name="general">General Information</a>
    245 </h2>
    246 <!-- *********************************************************************** -->
    247 
    248 <div>
    249 
    250 <p>This section contains general information that is useful if you are working
    251 in the LLVM source-base, but that isn't specific to any particular API.</p>
    252 
    253 <!-- ======================================================================= -->
    254 <h3>
    255   <a name="stl">The C++ Standard Template Library</a>
    256 </h3>
    257 
    258 <div>
    259 
    260 <p>LLVM makes heavy use of the C++ Standard Template Library (STL),
    261 perhaps much more than you are used to, or have seen before.  Because of
    262 this, you might want to do a little background reading in the
    263 techniques used and capabilities of the library.  There are many good
    264 pages that discuss the STL, and several books on the subject that you
    265 can get, so it will not be discussed in this document.</p>
    266 
    267 <p>Here are some useful links:</p>
    268 
    269 <ol>
    270 
    271 <li><a href="http://www.dinkumware.com/manuals/#Standard C++ Library">Dinkumware
    272 C++ Library reference</a> - an excellent reference for the STL and other parts
    273 of the standard C++ library.</li>
    274 
    275 <li><a href="http://www.tempest-sw.com/cpp/">C++ In a Nutshell</a> - This is an
    276 O'Reilly book in the making.  It has a decent Standard Library
    277 Reference that rivals Dinkumware's, and is unfortunately no longer free since the
    278 book has been published.</li>
    279 
    280 <li><a href="http://www.parashift.com/c++-faq-lite/">C++ Frequently Asked
    281 Questions</a></li>
    282 
    283 <li><a href="http://www.sgi.com/tech/stl/">SGI's STL Programmer's Guide</a> -
    284 Contains a useful <a
    285 href="http://www.sgi.com/tech/stl/stl_introduction.html">Introduction to the
    286 STL</a>.</li>
    287 
    288 <li><a href="http://www.research.att.com/%7Ebs/C++.html">Bjarne Stroustrup's C++
    289 Page</a></li>
    290 
    291 <li><a href="http://64.78.49.204/">
    292 Bruce Eckel's Thinking in C++, 2nd ed. Volume 2 Revision 4.0 (even better, get
    293 the book).</a></li>
    294 
    295 </ol>
    296   
    297 <p>You are also encouraged to take a look at the <a
    298 href="CodingStandards.html">LLVM Coding Standards</a> guide which focuses on how
    299 to write maintainable code more than where to put your curly braces.</p>
    300 
    301 </div>
    302 
    303 <!-- ======================================================================= -->
    304 <h3>
    305   <a name="stl">Other useful references</a>
    306 </h3>
    307 
    308 <div>
    309 
    310 <ol>
    311 <li><a href="http://www.fortran-2000.com/ArnaudRecipes/sharedlib.html">Using
    312 static and shared libraries across platforms</a></li>
    313 </ol>
    314 
    315 </div>
    316 
    317 </div>
    318 
    319 <!-- *********************************************************************** -->
    320 <h2>
    321   <a name="apis">Important and useful LLVM APIs</a>
    322 </h2>
    323 <!-- *********************************************************************** -->
    324 
    325 <div>
    326 
    327 <p>Here we highlight some LLVM APIs that are generally useful and good to
    328 know about when writing transformations.</p>
    329 
    330 <!-- ======================================================================= -->
    331 <h3>
    332   <a name="isa">The <tt>isa&lt;&gt;</tt>, <tt>cast&lt;&gt;</tt> and
    333   <tt>dyn_cast&lt;&gt;</tt> templates</a>
    334 </h3>
    335 
    336 <div>
    337 
    338 <p>The LLVM source-base makes extensive use of a custom form of RTTI.
    339 These templates have many similarities to the C++ <tt>dynamic_cast&lt;&gt;</tt>
    340 operator, but they don't have some drawbacks (primarily stemming from
    341 the fact that <tt>dynamic_cast&lt;&gt;</tt> only works on classes that
    342 have a v-table). Because they are used so often, you must know what they
    343 do and how they work. All of these templates are defined in the <a
    344  href="/doxygen/Casting_8h-source.html"><tt>llvm/Support/Casting.h</tt></a>
    345 file (note that you very rarely have to include this file directly).</p>
    346 
    347 <dl>
    348   <dt><tt>isa&lt;&gt;</tt>: </dt>
    349 
    350   <dd><p>The <tt>isa&lt;&gt;</tt> operator works exactly like the Java
    351   "<tt>instanceof</tt>" operator.  It returns true or false depending on whether
    352   a reference or pointer points to an instance of the specified class.  This can
    353   be very useful for constraint checking of various sorts (example below).</p>
    354   </dd>
    355 
    356   <dt><tt>cast&lt;&gt;</tt>: </dt>
    357 
    358   <dd><p>The <tt>cast&lt;&gt;</tt> operator is a "checked cast" operation. It
    359   converts a pointer or reference from a base class to a derived class, causing
    360   an assertion failure if it is not really an instance of the right type.  This
    361   should be used in cases where you have some information that makes you believe
    362   that something is of the right type.  An example of the <tt>isa&lt;&gt;</tt>
    363   and <tt>cast&lt;&gt;</tt> template is:</p>
    364 
    365 <div class="doc_code">
    366 <pre>
    367 static bool isLoopInvariant(const <a href="#Value">Value</a> *V, const Loop *L) {
    368   if (isa&lt;<a href="#Constant">Constant</a>&gt;(V) || isa&lt;<a href="#Argument">Argument</a>&gt;(V) || isa&lt;<a href="#GlobalValue">GlobalValue</a>&gt;(V))
    369     return true;
    370 
    371   // <i>Otherwise, it must be an instruction...</i>
    372   return !L-&gt;contains(cast&lt;<a href="#Instruction">Instruction</a>&gt;(V)-&gt;getParent());
    373 }
    374 </pre>
    375 </div>
    376 
    377   <p>Note that you should <b>not</b> use an <tt>isa&lt;&gt;</tt> test followed
    378   by a <tt>cast&lt;&gt;</tt>, for that use the <tt>dyn_cast&lt;&gt;</tt>
    379   operator.</p>
    380 
    381   </dd>
    382 
    383   <dt><tt>dyn_cast&lt;&gt;</tt>:</dt>
    384 
    385   <dd><p>The <tt>dyn_cast&lt;&gt;</tt> operator is a "checking cast" operation.
    386   It checks to see if the operand is of the specified type, and if so, returns a
    387   pointer to it (this operator does not work with references). If the operand is
    388   not of the correct type, a null pointer is returned.  Thus, this works very
    389   much like the <tt>dynamic_cast&lt;&gt;</tt> operator in C++, and should be
    390   used in the same circumstances.  Typically, the <tt>dyn_cast&lt;&gt;</tt>
    391   operator is used in an <tt>if</tt> statement or some other flow control
    392   statement like this:</p>
    393 
    394 <div class="doc_code">
    395 <pre>
    396 if (<a href="#AllocationInst">AllocationInst</a> *AI = dyn_cast&lt;<a href="#AllocationInst">AllocationInst</a>&gt;(Val)) {
    397   // <i>...</i>
    398 }
    399 </pre>
    400 </div>
    401    
    402   <p>This form of the <tt>if</tt> statement effectively combines together a call
    403   to <tt>isa&lt;&gt;</tt> and a call to <tt>cast&lt;&gt;</tt> into one
    404   statement, which is very convenient.</p>
    405 
    406   <p>Note that the <tt>dyn_cast&lt;&gt;</tt> operator, like C++'s
    407   <tt>dynamic_cast&lt;&gt;</tt> or Java's <tt>instanceof</tt> operator, can be
    408   abused.  In particular, you should not use big chained <tt>if/then/else</tt>
    409   blocks to check for lots of different variants of classes.  If you find
    410   yourself wanting to do this, it is much cleaner and more efficient to use the
    411   <tt>InstVisitor</tt> class to dispatch over the instruction type directly.</p>
    412 
    413   </dd>
    414 
    415   <dt><tt>cast_or_null&lt;&gt;</tt>: </dt>
    416   
    417   <dd><p>The <tt>cast_or_null&lt;&gt;</tt> operator works just like the
    418   <tt>cast&lt;&gt;</tt> operator, except that it allows for a null pointer as an
    419   argument (which it then propagates).  This can sometimes be useful, allowing
    420   you to combine several null checks into one.</p></dd>
    421 
    422   <dt><tt>dyn_cast_or_null&lt;&gt;</tt>: </dt>
    423 
    424   <dd><p>The <tt>dyn_cast_or_null&lt;&gt;</tt> operator works just like the
    425   <tt>dyn_cast&lt;&gt;</tt> operator, except that it allows for a null pointer
    426   as an argument (which it then propagates).  This can sometimes be useful,
    427   allowing you to combine several null checks into one.</p></dd>
    428 
    429 </dl>
    430 
    431 <p>These five templates can be used with any classes, whether they have a
    432 v-table or not.  To add support for these templates, you simply need to add
    433 <tt>classof</tt> static methods to the class you are interested casting
    434 to. Describing this is currently outside the scope of this document, but there
    435 are lots of examples in the LLVM source base.</p>
    436 
    437 </div>
    438 
    439 
    440 <!-- ======================================================================= -->
    441 <h3>
    442   <a name="string_apis">Passing strings (the <tt>StringRef</tt>
    443 and <tt>Twine</tt> classes)</a>
    444 </h3>
    445 
    446 <div>
    447 
    448 <p>Although LLVM generally does not do much string manipulation, we do have
    449 several important APIs which take strings.  Two important examples are the
    450 Value class -- which has names for instructions, functions, etc. -- and the
    451 StringMap class which is used extensively in LLVM and Clang.</p>
    452 
    453 <p>These are generic classes, and they need to be able to accept strings which
    454 may have embedded null characters.  Therefore, they cannot simply take
    455 a <tt>const char *</tt>, and taking a <tt>const std::string&amp;</tt> requires
    456 clients to perform a heap allocation which is usually unnecessary.  Instead,
    457 many LLVM APIs use a <tt>StringRef</tt> or a <tt>const Twine&amp;</tt> for
    458 passing strings efficiently.</p>
    459 
    460 <!-- _______________________________________________________________________ -->
    461 <h4>
    462   <a name="StringRef">The <tt>StringRef</tt> class</a>
    463 </h4>
    464 
    465 <div>
    466 
    467 <p>The <tt>StringRef</tt> data type represents a reference to a constant string
    468 (a character array and a length) and supports the common operations available
    469 on <tt>std:string</tt>, but does not require heap allocation.</p>
    470 
    471 <p>It can be implicitly constructed using a C style null-terminated string,
    472 an <tt>std::string</tt>, or explicitly with a character pointer and length.
    473 For example, the <tt>StringRef</tt> find function is declared as:</p>
    474 
    475 <pre class="doc_code">
    476   iterator find(StringRef Key);
    477 </pre>
    478 
    479 <p>and clients can call it using any one of:</p>
    480 
    481 <pre class="doc_code">
    482   Map.find("foo");                 <i>// Lookup "foo"</i>
    483   Map.find(std::string("bar"));    <i>// Lookup "bar"</i>
    484   Map.find(StringRef("\0baz", 4)); <i>// Lookup "\0baz"</i>
    485 </pre>
    486 
    487 <p>Similarly, APIs which need to return a string may return a <tt>StringRef</tt>
    488 instance, which can be used directly or converted to an <tt>std::string</tt>
    489 using the <tt>str</tt> member function.  See 
    490 "<tt><a href="/doxygen/classllvm_1_1StringRef_8h-source.html">llvm/ADT/StringRef.h</a></tt>"
    491 for more information.</p>
    492 
    493 <p>You should rarely use the <tt>StringRef</tt> class directly, because it contains
    494 pointers to external memory it is not generally safe to store an instance of the
    495 class (unless you know that the external storage will not be freed). StringRef is
    496 small and pervasive enough in LLVM that it should always be passed by value.</p>
    497 
    498 </div>
    499 
    500 <!-- _______________________________________________________________________ -->
    501 <h4>
    502   <a name="Twine">The <tt>Twine</tt> class</a>
    503 </h4>
    504 
    505 <div>
    506 
    507 <p>The <tt>Twine</tt> class is an efficient way for APIs to accept concatenated
    508 strings.  For example, a common LLVM paradigm is to name one instruction based on
    509 the name of another instruction with a suffix, for example:</p>
    510 
    511 <div class="doc_code">
    512 <pre>
    513     New = CmpInst::Create(<i>...</i>, SO->getName() + ".cmp");
    514 </pre>
    515 </div>
    516 
    517 <p>The <tt>Twine</tt> class is effectively a
    518 lightweight <a href="http://en.wikipedia.org/wiki/Rope_(computer_science)">rope</a>
    519 which points to temporary (stack allocated) objects.  Twines can be implicitly
    520 constructed as the result of the plus operator applied to strings (i.e., a C
    521 strings, an <tt>std::string</tt>, or a <tt>StringRef</tt>).  The twine delays the
    522 actual concatenation of strings until it is actually required, at which point
    523 it can be efficiently rendered directly into a character array.  This avoids
    524 unnecessary heap allocation involved in constructing the temporary results of
    525 string concatenation. See
    526 "<tt><a href="/doxygen/classllvm_1_1Twine_8h-source.html">llvm/ADT/Twine.h</a></tt>"
    527 for more information.</p>
    528 
    529 <p>As with a <tt>StringRef</tt>, <tt>Twine</tt> objects point to external memory
    530 and should almost never be stored or mentioned directly.  They are intended
    531 solely for use when defining a function which should be able to efficiently
    532 accept concatenated strings.</p>
    533 
    534 </div>
    535 
    536 </div>
    537 
    538 <!-- ======================================================================= -->
    539 <h3>
    540   <a name="DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt> option</a>
    541 </h3>
    542 
    543 <div>
    544 
    545 <p>Often when working on your pass you will put a bunch of debugging printouts
    546 and other code into your pass.  After you get it working, you want to remove
    547 it, but you may need it again in the future (to work out new bugs that you run
    548 across).</p>
    549 
    550 <p> Naturally, because of this, you don't want to delete the debug printouts,
    551 but you don't want them to always be noisy.  A standard compromise is to comment
    552 them out, allowing you to enable them if you need them in the future.</p>
    553 
    554 <p>The "<tt><a href="/doxygen/Debug_8h-source.html">llvm/Support/Debug.h</a></tt>"
    555 file provides a macro named <tt>DEBUG()</tt> that is a much nicer solution to
    556 this problem.  Basically, you can put arbitrary code into the argument of the
    557 <tt>DEBUG</tt> macro, and it is only executed if '<tt>opt</tt>' (or any other
    558 tool) is run with the '<tt>-debug</tt>' command line argument:</p>
    559 
    560 <div class="doc_code">
    561 <pre>
    562 DEBUG(errs() &lt;&lt; "I am here!\n");
    563 </pre>
    564 </div>
    565 
    566 <p>Then you can run your pass like this:</p>
    567 
    568 <div class="doc_code">
    569 <pre>
    570 $ opt &lt; a.bc &gt; /dev/null -mypass
    571 <i>&lt;no output&gt;</i>
    572 $ opt &lt; a.bc &gt; /dev/null -mypass -debug
    573 I am here!
    574 </pre>
    575 </div>
    576 
    577 <p>Using the <tt>DEBUG()</tt> macro instead of a home-brewed solution allows you
    578 to not have to create "yet another" command line option for the debug output for
    579 your pass.  Note that <tt>DEBUG()</tt> macros are disabled for optimized builds,
    580 so they do not cause a performance impact at all (for the same reason, they
    581 should also not contain side-effects!).</p>
    582 
    583 <p>One additional nice thing about the <tt>DEBUG()</tt> macro is that you can
    584 enable or disable it directly in gdb.  Just use "<tt>set DebugFlag=0</tt>" or
    585 "<tt>set DebugFlag=1</tt>" from the gdb if the program is running.  If the
    586 program hasn't been started yet, you can always just run it with
    587 <tt>-debug</tt>.</p>
    588 
    589 <!-- _______________________________________________________________________ -->
    590 <h4>
    591   <a name="DEBUG_TYPE">Fine grained debug info with <tt>DEBUG_TYPE</tt> and
    592   the <tt>-debug-only</tt> option</a>
    593 </h4>
    594 
    595 <div>
    596 
    597 <p>Sometimes you may find yourself in a situation where enabling <tt>-debug</tt>
    598 just turns on <b>too much</b> information (such as when working on the code
    599 generator).  If you want to enable debug information with more fine-grained
    600 control, you define the <tt>DEBUG_TYPE</tt> macro and the <tt>-debug</tt> only
    601 option as follows:</p>
    602 
    603 <div class="doc_code">
    604 <pre>
    605 #undef  DEBUG_TYPE
    606 DEBUG(errs() &lt;&lt; "No debug type\n");
    607 #define DEBUG_TYPE "foo"
    608 DEBUG(errs() &lt;&lt; "'foo' debug type\n");
    609 #undef  DEBUG_TYPE
    610 #define DEBUG_TYPE "bar"
    611 DEBUG(errs() &lt;&lt; "'bar' debug type\n"));
    612 #undef  DEBUG_TYPE
    613 #define DEBUG_TYPE ""
    614 DEBUG(errs() &lt;&lt; "No debug type (2)\n");
    615 </pre>
    616 </div>
    617 
    618 <p>Then you can run your pass like this:</p>
    619 
    620 <div class="doc_code">
    621 <pre>
    622 $ opt &lt; a.bc &gt; /dev/null -mypass
    623 <i>&lt;no output&gt;</i>
    624 $ opt &lt; a.bc &gt; /dev/null -mypass -debug
    625 No debug type
    626 'foo' debug type
    627 'bar' debug type
    628 No debug type (2)
    629 $ opt &lt; a.bc &gt; /dev/null -mypass -debug-only=foo
    630 'foo' debug type
    631 $ opt &lt; a.bc &gt; /dev/null -mypass -debug-only=bar
    632 'bar' debug type
    633 </pre>
    634 </div>
    635 
    636 <p>Of course, in practice, you should only set <tt>DEBUG_TYPE</tt> at the top of
    637 a file, to specify the debug type for the entire module (if you do this before
    638 you <tt>#include "llvm/Support/Debug.h"</tt>, you don't have to insert the ugly
    639 <tt>#undef</tt>'s).  Also, you should use names more meaningful than "foo" and
    640 "bar", because there is no system in place to ensure that names do not
    641 conflict. If two different modules use the same string, they will all be turned
    642 on when the name is specified. This allows, for example, all debug information
    643 for instruction scheduling to be enabled with <tt>-debug-type=InstrSched</tt>,
    644 even if the source lives in multiple files.</p>
    645 
    646 <p>The <tt>DEBUG_WITH_TYPE</tt> macro is also available for situations where you
    647 would like to set <tt>DEBUG_TYPE</tt>, but only for one specific <tt>DEBUG</tt>
    648 statement. It takes an additional first parameter, which is the type to use. For
    649 example, the preceding example could be written as:</p>
    650 
    651 
    652 <div class="doc_code">
    653 <pre>
    654 DEBUG_WITH_TYPE("", errs() &lt;&lt; "No debug type\n");
    655 DEBUG_WITH_TYPE("foo", errs() &lt;&lt; "'foo' debug type\n");
    656 DEBUG_WITH_TYPE("bar", errs() &lt;&lt; "'bar' debug type\n"));
    657 DEBUG_WITH_TYPE("", errs() &lt;&lt; "No debug type (2)\n");
    658 </pre>
    659 </div>
    660 
    661 </div>
    662 
    663 </div>
    664 
    665 <!-- ======================================================================= -->
    666 <h3>
    667   <a name="Statistic">The <tt>Statistic</tt> class &amp; <tt>-stats</tt>
    668   option</a>
    669 </h3>
    670 
    671 <div>
    672 
    673 <p>The "<tt><a
    674 href="/doxygen/Statistic_8h-source.html">llvm/ADT/Statistic.h</a></tt>" file
    675 provides a class named <tt>Statistic</tt> that is used as a unified way to
    676 keep track of what the LLVM compiler is doing and how effective various
    677 optimizations are.  It is useful to see what optimizations are contributing to
    678 making a particular program run faster.</p>
    679 
    680 <p>Often you may run your pass on some big program, and you're interested to see
    681 how many times it makes a certain transformation.  Although you can do this with
    682 hand inspection, or some ad-hoc method, this is a real pain and not very useful
    683 for big programs.  Using the <tt>Statistic</tt> class makes it very easy to
    684 keep track of this information, and the calculated information is presented in a
    685 uniform manner with the rest of the passes being executed.</p>
    686 
    687 <p>There are many examples of <tt>Statistic</tt> uses, but the basics of using
    688 it are as follows:</p>
    689 
    690 <ol>
    691     <li><p>Define your statistic like this:</p>
    692 
    693 <div class="doc_code">
    694 <pre>
    695 #define <a href="#DEBUG_TYPE">DEBUG_TYPE</a> "mypassname"   <i>// This goes before any #includes.</i>
    696 STATISTIC(NumXForms, "The # of times I did stuff");
    697 </pre>
    698 </div>
    699 
    700   <p>The <tt>STATISTIC</tt> macro defines a static variable, whose name is
    701     specified by the first argument.  The pass name is taken from the DEBUG_TYPE
    702     macro, and the description is taken from the second argument.  The variable
    703     defined ("NumXForms" in this case) acts like an unsigned integer.</p></li>
    704 
    705     <li><p>Whenever you make a transformation, bump the counter:</p>
    706 
    707 <div class="doc_code">
    708 <pre>
    709 ++NumXForms;   // <i>I did stuff!</i>
    710 </pre>
    711 </div>
    712 
    713     </li>
    714   </ol>
    715 
    716   <p>That's all you have to do.  To get '<tt>opt</tt>' to print out the
    717   statistics gathered, use the '<tt>-stats</tt>' option:</p>
    718 
    719 <div class="doc_code">
    720 <pre>
    721 $ opt -stats -mypassname &lt; program.bc &gt; /dev/null
    722 <i>... statistics output ...</i>
    723 </pre>
    724 </div>
    725 
    726   <p> When running <tt>opt</tt> on a C file from the SPEC benchmark
    727 suite, it gives a report that looks like this:</p>
    728 
    729 <div class="doc_code">
    730 <pre>
    731    7646 bitcodewriter   - Number of normal instructions
    732     725 bitcodewriter   - Number of oversized instructions
    733  129996 bitcodewriter   - Number of bitcode bytes written
    734    2817 raise           - Number of insts DCEd or constprop'd
    735    3213 raise           - Number of cast-of-self removed
    736    5046 raise           - Number of expression trees converted
    737      75 raise           - Number of other getelementptr's formed
    738     138 raise           - Number of load/store peepholes
    739      42 deadtypeelim    - Number of unused typenames removed from symtab
    740     392 funcresolve     - Number of varargs functions resolved
    741      27 globaldce       - Number of global variables removed
    742       2 adce            - Number of basic blocks removed
    743     134 cee             - Number of branches revectored
    744      49 cee             - Number of setcc instruction eliminated
    745     532 gcse            - Number of loads removed
    746    2919 gcse            - Number of instructions removed
    747      86 indvars         - Number of canonical indvars added
    748      87 indvars         - Number of aux indvars removed
    749      25 instcombine     - Number of dead inst eliminate
    750     434 instcombine     - Number of insts combined
    751     248 licm            - Number of load insts hoisted
    752    1298 licm            - Number of insts hoisted to a loop pre-header
    753       3 licm            - Number of insts hoisted to multiple loop preds (bad, no loop pre-header)
    754      75 mem2reg         - Number of alloca's promoted
    755    1444 cfgsimplify     - Number of blocks simplified
    756 </pre>
    757 </div>
    758 
    759 <p>Obviously, with so many optimizations, having a unified framework for this
    760 stuff is very nice.  Making your pass fit well into the framework makes it more
    761 maintainable and useful.</p>
    762 
    763 </div>
    764 
    765 <!-- ======================================================================= -->
    766 <h3>
    767   <a name="ViewGraph">Viewing graphs while debugging code</a>
    768 </h3>
    769 
    770 <div>
    771 
    772 <p>Several of the important data structures in LLVM are graphs: for example
    773 CFGs made out of LLVM <a href="#BasicBlock">BasicBlock</a>s, CFGs made out of
    774 LLVM <a href="CodeGenerator.html#machinebasicblock">MachineBasicBlock</a>s, and
    775 <a href="CodeGenerator.html#selectiondag_intro">Instruction Selection
    776 DAGs</a>.  In many cases, while debugging various parts of the compiler, it is
    777 nice to instantly visualize these graphs.</p>
    778 
    779 <p>LLVM provides several callbacks that are available in a debug build to do
    780 exactly that.  If you call the <tt>Function::viewCFG()</tt> method, for example,
    781 the current LLVM tool will pop up a window containing the CFG for the function
    782 where each basic block is a node in the graph, and each node contains the
    783 instructions in the block.  Similarly, there also exists 
    784 <tt>Function::viewCFGOnly()</tt> (does not include the instructions), the
    785 <tt>MachineFunction::viewCFG()</tt> and <tt>MachineFunction::viewCFGOnly()</tt>,
    786 and the <tt>SelectionDAG::viewGraph()</tt> methods.  Within GDB, for example,
    787 you can usually use something like <tt>call DAG.viewGraph()</tt> to pop
    788 up a window.  Alternatively, you can sprinkle calls to these functions in your
    789 code in places you want to debug.</p>
    790 
    791 <p>Getting this to work requires a small amount of configuration.  On Unix
    792 systems with X11, install the <a href="http://www.graphviz.org">graphviz</a>
    793 toolkit, and make sure 'dot' and 'gv' are in your path.  If you are running on
    794 Mac OS/X, download and install the Mac OS/X <a 
    795 href="http://www.pixelglow.com/graphviz/">Graphviz program</a>, and add
    796 <tt>/Applications/Graphviz.app/Contents/MacOS/</tt> (or wherever you install
    797 it) to your path.  Once in your system and path are set up, rerun the LLVM
    798 configure script and rebuild LLVM to enable this functionality.</p>
    799 
    800 <p><tt>SelectionDAG</tt> has been extended to make it easier to locate
    801 <i>interesting</i> nodes in large complex graphs.  From gdb, if you
    802 <tt>call DAG.setGraphColor(<i>node</i>, "<i>color</i>")</tt>, then the
    803 next <tt>call DAG.viewGraph()</tt> would highlight the node in the
    804 specified color (choices of colors can be found at <a
    805 href="http://www.graphviz.org/doc/info/colors.html">colors</a>.) More
    806 complex node attributes can be provided with <tt>call
    807 DAG.setGraphAttrs(<i>node</i>, "<i>attributes</i>")</tt> (choices can be
    808 found at <a href="http://www.graphviz.org/doc/info/attrs.html">Graph
    809 Attributes</a>.)  If you want to restart and clear all the current graph
    810 attributes, then you can <tt>call DAG.clearGraphAttrs()</tt>. </p>
    811 
    812 <p>Note that graph visualization features are compiled out of Release builds
    813 to reduce file size.  This means that you need a Debug+Asserts or 
    814 Release+Asserts build to use these features.</p>
    815 
    816 </div>
    817 
    818 </div>
    819 
    820 <!-- *********************************************************************** -->
    821 <h2>
    822   <a name="datastructure">Picking the Right Data Structure for a Task</a>
    823 </h2>
    824 <!-- *********************************************************************** -->
    825 
    826 <div>
    827 
    828 <p>LLVM has a plethora of data structures in the <tt>llvm/ADT/</tt> directory,
    829  and we commonly use STL data structures.  This section describes the trade-offs
    830  you should consider when you pick one.</p>
    831 
    832 <p>
    833 The first step is a choose your own adventure: do you want a sequential
    834 container, a set-like container, or a map-like container?  The most important
    835 thing when choosing a container is the algorithmic properties of how you plan to
    836 access the container.  Based on that, you should use:</p>
    837 
    838 <ul>
    839 <li>a <a href="#ds_map">map-like</a> container if you need efficient look-up
    840     of an value based on another value.  Map-like containers also support
    841     efficient queries for containment (whether a key is in the map).  Map-like
    842     containers generally do not support efficient reverse mapping (values to
    843     keys).  If you need that, use two maps.  Some map-like containers also
    844     support efficient iteration through the keys in sorted order.  Map-like
    845     containers are the most expensive sort, only use them if you need one of
    846     these capabilities.</li>
    847 
    848 <li>a <a href="#ds_set">set-like</a> container if you need to put a bunch of
    849     stuff into a container that automatically eliminates duplicates.  Some
    850     set-like containers support efficient iteration through the elements in
    851     sorted order.  Set-like containers are more expensive than sequential
    852     containers.
    853 </li>
    854 
    855 <li>a <a href="#ds_sequential">sequential</a> container provides
    856     the most efficient way to add elements and keeps track of the order they are
    857     added to the collection.  They permit duplicates and support efficient
    858     iteration, but do not support efficient look-up based on a key.
    859 </li>
    860 
    861 <li>a <a href="#ds_string">string</a> container is a specialized sequential
    862     container or reference structure that is used for character or byte
    863     arrays.</li>
    864 
    865 <li>a <a href="#ds_bit">bit</a> container provides an efficient way to store and
    866     perform set operations on sets of numeric id's, while automatically
    867     eliminating duplicates.  Bit containers require a maximum of 1 bit for each
    868     identifier you want to store.
    869 </li>
    870 </ul>
    871 
    872 <p>
    873 Once the proper category of container is determined, you can fine tune the
    874 memory use, constant factors, and cache behaviors of access by intelligently
    875 picking a member of the category.  Note that constant factors and cache behavior
    876 can be a big deal.  If you have a vector that usually only contains a few
    877 elements (but could contain many), for example, it's much better to use
    878 <a href="#dss_smallvector">SmallVector</a> than <a href="#dss_vector">vector</a>
    879 .  Doing so avoids (relatively) expensive malloc/free calls, which dwarf the
    880 cost of adding the elements to the container. </p>
    881 
    882 </div>
    883   
    884   
    885 <!-- ======================================================================= -->
    886 <h3>
    887   <a name="ds_sequential">Sequential Containers (std::vector, std::list, etc)</a>
    888 </h3>
    889 
    890 <div>
    891 There are a variety of sequential containers available for you, based on your
    892 needs.  Pick the first in this section that will do what you want.
    893   
    894 <!-- _______________________________________________________________________ -->
    895 <h4>
    896   <a name="dss_arrayref">llvm/ADT/ArrayRef.h</a>
    897 </h4>
    898 
    899 <div>
    900 <p>The llvm::ArrayRef class is the preferred class to use in an interface that
    901    accepts a sequential list of elements in memory and just reads from them.  By
    902    taking an ArrayRef, the API can be passed a fixed size array, an std::vector,
    903    an llvm::SmallVector and anything else that is contiguous in memory.
    904 </p>
    905 </div>
    906 
    907 
    908   
    909 <!-- _______________________________________________________________________ -->
    910 <h4>
    911   <a name="dss_fixedarrays">Fixed Size Arrays</a>
    912 </h4>
    913 
    914 <div>
    915 <p>Fixed size arrays are very simple and very fast.  They are good if you know
    916 exactly how many elements you have, or you have a (low) upper bound on how many
    917 you have.</p>
    918 </div>
    919 
    920 <!-- _______________________________________________________________________ -->
    921 <h4>
    922   <a name="dss_heaparrays">Heap Allocated Arrays</a>
    923 </h4>
    924 
    925 <div>
    926 <p>Heap allocated arrays (new[] + delete[]) are also simple.  They are good if
    927 the number of elements is variable, if you know how many elements you will need
    928 before the array is allocated, and if the array is usually large (if not,
    929 consider a <a href="#dss_smallvector">SmallVector</a>).  The cost of a heap
    930 allocated array is the cost of the new/delete (aka malloc/free).  Also note that
    931 if you are allocating an array of a type with a constructor, the constructor and
    932 destructors will be run for every element in the array (re-sizable vectors only
    933 construct those elements actually used).</p>
    934 </div>
    935 
    936 <!-- _______________________________________________________________________ -->
    937 <h4>
    938   <a name="dss_tinyptrvector">"llvm/ADT/TinyPtrVector.h"</a>
    939 </h4>
    940 
    941 
    942 <div>
    943 <p><tt>TinyPtrVector&lt;Type&gt;</tt> is a highly specialized collection class
    944 that is optimized to avoid allocation in the case when a vector has zero or one
    945 elements.  It has two major restrictions: 1) it can only hold values of pointer
    946 type, and 2) it cannot hold a null pointer.</p>
    947   
    948 <p>Since this container is highly specialized, it is rarely used.</p>
    949   
    950 </div>
    951     
    952 <!-- _______________________________________________________________________ -->
    953 <h4>
    954   <a name="dss_smallvector">"llvm/ADT/SmallVector.h"</a>
    955 </h4>
    956 
    957 <div>
    958 <p><tt>SmallVector&lt;Type, N&gt;</tt> is a simple class that looks and smells
    959 just like <tt>vector&lt;Type&gt;</tt>:
    960 it supports efficient iteration, lays out elements in memory order (so you can
    961 do pointer arithmetic between elements), supports efficient push_back/pop_back
    962 operations, supports efficient random access to its elements, etc.</p>
    963 
    964 <p>The advantage of SmallVector is that it allocates space for
    965 some number of elements (N) <b>in the object itself</b>.  Because of this, if
    966 the SmallVector is dynamically smaller than N, no malloc is performed.  This can
    967 be a big win in cases where the malloc/free call is far more expensive than the
    968 code that fiddles around with the elements.</p>
    969 
    970 <p>This is good for vectors that are "usually small" (e.g. the number of
    971 predecessors/successors of a block is usually less than 8).  On the other hand,
    972 this makes the size of the SmallVector itself large, so you don't want to
    973 allocate lots of them (doing so will waste a lot of space).  As such,
    974 SmallVectors are most useful when on the stack.</p>
    975 
    976 <p>SmallVector also provides a nice portable and efficient replacement for
    977 <tt>alloca</tt>.</p>
    978 
    979 </div>
    980 
    981 <!-- _______________________________________________________________________ -->
    982 <h4>
    983   <a name="dss_vector">&lt;vector&gt;</a>
    984 </h4>
    985 
    986 <div>
    987 <p>
    988 std::vector is well loved and respected.  It is useful when SmallVector isn't:
    989 when the size of the vector is often large (thus the small optimization will
    990 rarely be a benefit) or if you will be allocating many instances of the vector
    991 itself (which would waste space for elements that aren't in the container).
    992 vector is also useful when interfacing with code that expects vectors :).
    993 </p>
    994 
    995 <p>One worthwhile note about std::vector: avoid code like this:</p>
    996 
    997 <div class="doc_code">
    998 <pre>
    999 for ( ... ) {
   1000    std::vector&lt;foo&gt; V;
   1001    use V;
   1002 }
   1003 </pre>
   1004 </div>
   1005 
   1006 <p>Instead, write this as:</p>
   1007 
   1008 <div class="doc_code">
   1009 <pre>
   1010 std::vector&lt;foo&gt; V;
   1011 for ( ... ) {
   1012    use V;
   1013    V.clear();
   1014 }
   1015 </pre>
   1016 </div>
   1017 
   1018 <p>Doing so will save (at least) one heap allocation and free per iteration of
   1019 the loop.</p>
   1020 
   1021 </div>
   1022 
   1023 <!-- _______________________________________________________________________ -->
   1024 <h4>
   1025   <a name="dss_deque">&lt;deque&gt;</a>
   1026 </h4>
   1027 
   1028 <div>
   1029 <p>std::deque is, in some senses, a generalized version of std::vector.  Like
   1030 std::vector, it provides constant time random access and other similar
   1031 properties, but it also provides efficient access to the front of the list.  It
   1032 does not guarantee continuity of elements within memory.</p>
   1033 
   1034 <p>In exchange for this extra flexibility, std::deque has significantly higher
   1035 constant factor costs than std::vector.  If possible, use std::vector or
   1036 something cheaper.</p>
   1037 </div>
   1038 
   1039 <!-- _______________________________________________________________________ -->
   1040 <h4>
   1041   <a name="dss_list">&lt;list&gt;</a>
   1042 </h4>
   1043 
   1044 <div>
   1045 <p>std::list is an extremely inefficient class that is rarely useful.
   1046 It performs a heap allocation for every element inserted into it, thus having an
   1047 extremely high constant factor, particularly for small data types.  std::list
   1048 also only supports bidirectional iteration, not random access iteration.</p>
   1049 
   1050 <p>In exchange for this high cost, std::list supports efficient access to both
   1051 ends of the list (like std::deque, but unlike std::vector or SmallVector).  In
   1052 addition, the iterator invalidation characteristics of std::list are stronger
   1053 than that of a vector class: inserting or removing an element into the list does
   1054 not invalidate iterator or pointers to other elements in the list.</p>
   1055 </div>
   1056 
   1057 <!-- _______________________________________________________________________ -->
   1058 <h4>
   1059   <a name="dss_ilist">llvm/ADT/ilist.h</a>
   1060 </h4>
   1061 
   1062 <div>
   1063 <p><tt>ilist&lt;T&gt;</tt> implements an 'intrusive' doubly-linked list.  It is
   1064 intrusive, because it requires the element to store and provide access to the
   1065 prev/next pointers for the list.</p>
   1066 
   1067 <p><tt>ilist</tt> has the same drawbacks as <tt>std::list</tt>, and additionally
   1068 requires an <tt>ilist_traits</tt> implementation for the element type, but it
   1069 provides some novel characteristics.  In particular, it can efficiently store
   1070 polymorphic objects, the traits class is informed when an element is inserted or
   1071 removed from the list, and <tt>ilist</tt>s are guaranteed to support a
   1072 constant-time splice operation.</p>
   1073 
   1074 <p>These properties are exactly what we want for things like
   1075 <tt>Instruction</tt>s and basic blocks, which is why these are implemented with
   1076 <tt>ilist</tt>s.</p>
   1077 
   1078 Related classes of interest are explained in the following subsections:
   1079     <ul>
   1080       <li><a href="#dss_ilist_traits">ilist_traits</a></li>
   1081       <li><a href="#dss_iplist">iplist</a></li>
   1082       <li><a href="#dss_ilist_node">llvm/ADT/ilist_node.h</a></li>
   1083       <li><a href="#dss_ilist_sentinel">Sentinels</a></li>
   1084     </ul>
   1085 </div>
   1086 
   1087 <!-- _______________________________________________________________________ -->
   1088 <h4>
   1089   <a name="dss_packedvector">llvm/ADT/PackedVector.h</a>
   1090 </h4>
   1091 
   1092 <div>
   1093 <p>
   1094 Useful for storing a vector of values using only a few number of bits for each
   1095 value. Apart from the standard operations of a vector-like container, it can
   1096 also perform an 'or' set operation. 
   1097 </p>
   1098 
   1099 <p>For example:</p>
   1100 
   1101 <div class="doc_code">
   1102 <pre>
   1103 enum State {
   1104     None = 0x0,
   1105     FirstCondition = 0x1,
   1106     SecondCondition = 0x2,
   1107     Both = 0x3
   1108 };
   1109 
   1110 State get() {
   1111     PackedVector&lt;State, 2&gt; Vec1;
   1112     Vec1.push_back(FirstCondition);
   1113 
   1114     PackedVector&lt;State, 2&gt; Vec2;
   1115     Vec2.push_back(SecondCondition);
   1116 
   1117     Vec1 |= Vec2;
   1118     return Vec1[0]; // returns 'Both'.
   1119 }
   1120 </pre>
   1121 </div>
   1122 
   1123 </div>
   1124 
   1125 <!-- _______________________________________________________________________ -->
   1126 <h4>
   1127   <a name="dss_ilist_traits">ilist_traits</a>
   1128 </h4>
   1129 
   1130 <div>
   1131 <p><tt>ilist_traits&lt;T&gt;</tt> is <tt>ilist&lt;T&gt;</tt>'s customization
   1132 mechanism. <tt>iplist&lt;T&gt;</tt> (and consequently <tt>ilist&lt;T&gt;</tt>)
   1133 publicly derive from this traits class.</p>
   1134 </div>
   1135 
   1136 <!-- _______________________________________________________________________ -->
   1137 <h4>
   1138   <a name="dss_iplist">iplist</a>
   1139 </h4>
   1140 
   1141 <div>
   1142 <p><tt>iplist&lt;T&gt;</tt> is <tt>ilist&lt;T&gt;</tt>'s base and as such
   1143 supports a slightly narrower interface. Notably, inserters from
   1144 <tt>T&amp;</tt> are absent.</p>
   1145 
   1146 <p><tt>ilist_traits&lt;T&gt;</tt> is a public base of this class and can be
   1147 used for a wide variety of customizations.</p>
   1148 </div>
   1149 
   1150 <!-- _______________________________________________________________________ -->
   1151 <h4>
   1152   <a name="dss_ilist_node">llvm/ADT/ilist_node.h</a>
   1153 </h4>
   1154 
   1155 <div>
   1156 <p><tt>ilist_node&lt;T&gt;</tt> implements a the forward and backward links
   1157 that are expected by the <tt>ilist&lt;T&gt;</tt> (and analogous containers)
   1158 in the default manner.</p>
   1159 
   1160 <p><tt>ilist_node&lt;T&gt;</tt>s are meant to be embedded in the node type
   1161 <tt>T</tt>, usually <tt>T</tt> publicly derives from
   1162 <tt>ilist_node&lt;T&gt;</tt>.</p>
   1163 </div>
   1164 
   1165 <!-- _______________________________________________________________________ -->
   1166 <h4>
   1167   <a name="dss_ilist_sentinel">Sentinels</a>
   1168 </h4>
   1169 
   1170 <div>
   1171 <p><tt>ilist</tt>s have another specialty that must be considered. To be a good
   1172 citizen in the C++ ecosystem, it needs to support the standard container
   1173 operations, such as <tt>begin</tt> and <tt>end</tt> iterators, etc. Also, the
   1174 <tt>operator--</tt> must work correctly on the <tt>end</tt> iterator in the
   1175 case of non-empty <tt>ilist</tt>s.</p>
   1176 
   1177 <p>The only sensible solution to this problem is to allocate a so-called
   1178 <i>sentinel</i> along with the intrusive list, which serves as the <tt>end</tt>
   1179 iterator, providing the back-link to the last element. However conforming to the
   1180 C++ convention it is illegal to <tt>operator++</tt> beyond the sentinel and it
   1181 also must not be dereferenced.</p>
   1182 
   1183 <p>These constraints allow for some implementation freedom to the <tt>ilist</tt>
   1184 how to allocate and store the sentinel. The corresponding policy is dictated
   1185 by <tt>ilist_traits&lt;T&gt;</tt>. By default a <tt>T</tt> gets heap-allocated
   1186 whenever the need for a sentinel arises.</p>
   1187 
   1188 <p>While the default policy is sufficient in most cases, it may break down when
   1189 <tt>T</tt> does not provide a default constructor. Also, in the case of many
   1190 instances of <tt>ilist</tt>s, the memory overhead of the associated sentinels
   1191 is wasted. To alleviate the situation with numerous and voluminous
   1192 <tt>T</tt>-sentinels, sometimes a trick is employed, leading to <i>ghostly
   1193 sentinels</i>.</p>
   1194 
   1195 <p>Ghostly sentinels are obtained by specially-crafted <tt>ilist_traits&lt;T&gt;</tt>
   1196 which superpose the sentinel with the <tt>ilist</tt> instance in memory. Pointer
   1197 arithmetic is used to obtain the sentinel, which is relative to the
   1198 <tt>ilist</tt>'s <tt>this</tt> pointer. The <tt>ilist</tt> is augmented by an
   1199 extra pointer, which serves as the back-link of the sentinel. This is the only
   1200 field in the ghostly sentinel which can be legally accessed.</p>
   1201 </div>
   1202 
   1203 <!-- _______________________________________________________________________ -->
   1204 <h4>
   1205   <a name="dss_other">Other Sequential Container options</a>
   1206 </h4>
   1207 
   1208 <div>
   1209 <p>Other STL containers are available, such as std::string.</p>
   1210 
   1211 <p>There are also various STL adapter classes such as std::queue,
   1212 std::priority_queue, std::stack, etc.  These provide simplified access to an
   1213 underlying container but don't affect the cost of the container itself.</p>
   1214 
   1215 </div>
   1216 </div>
   1217 
   1218 <!-- ======================================================================= -->
   1219 <h3>
   1220   <a name="ds_string">String-like containers</a>
   1221 </h3>
   1222 
   1223 <div>
   1224 
   1225 <p>
   1226 There are a variety of ways to pass around and use strings in C and C++, and
   1227 LLVM adds a few new options to choose from.  Pick the first option on this list
   1228 that will do what you need, they are ordered according to their relative cost.
   1229 </p>
   1230 <p>
   1231 Note that is is generally preferred to <em>not</em> pass strings around as 
   1232 "<tt>const char*</tt>"'s.  These have a number of problems, including the fact
   1233 that they cannot represent embedded nul ("\0") characters, and do not have a
   1234 length available efficiently.  The general replacement for '<tt>const 
   1235 char*</tt>' is StringRef.
   1236 </p>
   1237   
   1238 <p>For more information on choosing string containers for APIs, please see
   1239 <a href="#string_apis">Passing strings</a>.</p>
   1240   
   1241   
   1242 <!-- _______________________________________________________________________ -->
   1243 <h4>
   1244   <a name="dss_stringref">llvm/ADT/StringRef.h</a>
   1245 </h4>
   1246 
   1247 <div>
   1248 <p>
   1249 The StringRef class is a simple value class that contains a pointer to a
   1250 character and a length, and is quite related to the <a 
   1251 href="#dss_arrayref">ArrayRef</a> class (but specialized for arrays of
   1252 characters).  Because StringRef carries a length with it, it safely handles
   1253 strings with embedded nul characters in it, getting the length does not require
   1254 a strlen call, and it even has very convenient APIs for slicing and dicing the
   1255 character range that it represents.
   1256 </p>
   1257   
   1258 <p>
   1259 StringRef is ideal for passing simple strings around that are known to be live,
   1260 either because they are C string literals, std::string, a C array, or a
   1261 SmallVector.  Each of these cases has an efficient implicit conversion to
   1262 StringRef, which doesn't result in a dynamic strlen being executed.
   1263 </p>
   1264   
   1265 <p>StringRef has a few major limitations which make more powerful string
   1266 containers useful:</p>
   1267   
   1268 <ol>
   1269 <li>You cannot directly convert a StringRef to a 'const char*' because there is
   1270 no way to add a trailing nul (unlike the .c_str() method on various stronger
   1271 classes).</li>
   1272 
   1273   
   1274 <li>StringRef doesn't own or keep alive the underlying string bytes.
   1275 As such it can easily lead to dangling pointers, and is not suitable for
   1276 embedding in datastructures in most cases (instead, use an std::string or
   1277 something like that).</li>
   1278   
   1279 <li>For the same reason, StringRef cannot be used as the return value of a
   1280 method if the method "computes" the result string.  Instead, use
   1281 std::string.</li>
   1282     
   1283 <li>StringRef's do not allow you to mutate the pointed-to string bytes and it
   1284 doesn't allow you to insert or remove bytes from the range.  For editing 
   1285 operations like this, it interoperates with the <a 
   1286 href="#dss_twine">Twine</a> class.</li>
   1287 </ol>
   1288   
   1289 <p>Because of its strengths and limitations, it is very common for a function to
   1290 take a StringRef and for a method on an object to return a StringRef that
   1291 points into some string that it owns.</p>
   1292   
   1293 </div>
   1294   
   1295 <!-- _______________________________________________________________________ -->
   1296 <h4>
   1297   <a name="dss_twine">llvm/ADT/Twine.h</a>
   1298 </h4>
   1299 
   1300 <div>
   1301   <p>
   1302   The Twine class is used as an intermediary datatype for APIs that want to take
   1303   a string that can be constructed inline with a series of concatenations.
   1304   Twine works by forming recursive instances of the Twine datatype (a simple
   1305   value object) on the stack as temporary objects, linking them together into a
   1306   tree which is then linearized when the Twine is consumed.  Twine is only safe
   1307   to use as the argument to a function, and should always be a const reference,
   1308   e.g.:
   1309   </p>
   1310   
   1311   <pre>
   1312     void foo(const Twine &amp;T);
   1313     ...
   1314     StringRef X = ...
   1315     unsigned i = ...
   1316     foo(X + "." + Twine(i));
   1317   </pre>
   1318   
   1319   <p>This example forms a string like "blarg.42" by concatenating the values
   1320   together, and does not form intermediate strings containing "blarg" or
   1321   "blarg.".
   1322   </p>
   1323   
   1324   <p>Because Twine is constructed with temporary objects on the stack, and
   1325   because these instances are destroyed at the end of the current statement,
   1326   it is an inherently dangerous API.  For example, this simple variant contains
   1327   undefined behavior and will probably crash:</p>
   1328   
   1329   <pre>
   1330     void foo(const Twine &amp;T);
   1331     ...
   1332     StringRef X = ...
   1333     unsigned i = ...
   1334     const Twine &amp;Tmp = X + "." + Twine(i);
   1335     foo(Tmp);
   1336   </pre>
   1337 
   1338   <p>... because the temporaries are destroyed before the call.  That said,
   1339   Twine's are much more efficient than intermediate std::string temporaries, and
   1340   they work really well with StringRef.  Just be aware of their limitations.</p>
   1341   
   1342 </div>
   1343 
   1344   
   1345 <!-- _______________________________________________________________________ -->
   1346 <h4>
   1347   <a name="dss_smallstring">llvm/ADT/SmallString.h</a>
   1348 </h4>
   1349 
   1350 <div>
   1351   
   1352 <p>SmallString is a subclass of <a href="#dss_smallvector">SmallVector</a> that
   1353 adds some convenience APIs like += that takes StringRef's.  SmallString avoids
   1354 allocating memory in the case when the preallocated space is enough to hold its
   1355 data, and it calls back to general heap allocation when required.  Since it owns
   1356 its data, it is very safe to use and supports full mutation of the string.</p>
   1357   
   1358 <p>Like SmallVector's, the big downside to SmallString is their sizeof.  While
   1359 they are optimized for small strings, they themselves are not particularly
   1360 small.  This means that they work great for temporary scratch buffers on the
   1361 stack, but should not generally be put into the heap: it is very rare to 
   1362 see a SmallString as the member of a frequently-allocated heap data structure
   1363 or returned by-value.
   1364 </p>
   1365 
   1366 </div>
   1367   
   1368 <!-- _______________________________________________________________________ -->
   1369 <h4>
   1370   <a name="dss_stdstring">std::string</a>
   1371 </h4>
   1372 
   1373 <div>
   1374   
   1375   <p>The standard C++ std::string class is a very general class that (like
   1376   SmallString) owns its underlying data.  sizeof(std::string) is very reasonable
   1377   so it can be embedded into heap data structures and returned by-value.
   1378   On the other hand, std::string is highly inefficient for inline editing (e.g.
   1379   concatenating a bunch of stuff together) and because it is provided by the
   1380   standard library, its performance characteristics depend a lot of the host
   1381   standard library (e.g. libc++ and MSVC provide a highly optimized string
   1382   class, GCC contains a really slow implementation).
   1383   </p>
   1384 
   1385   <p>The major disadvantage of std::string is that almost every operation that
   1386   makes them larger can allocate memory, which is slow.  As such, it is better
   1387   to use SmallVector or Twine as a scratch buffer, but then use std::string to
   1388   persist the result.</p>
   1389 
   1390   
   1391 </div>
   1392   
   1393 <!-- end of strings -->
   1394 </div>
   1395 
   1396   
   1397 <!-- ======================================================================= -->
   1398 <h3>
   1399   <a name="ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
   1400 </h3>
   1401 
   1402 <div>
   1403 
   1404 <p>Set-like containers are useful when you need to canonicalize multiple values
   1405 into a single representation.  There are several different choices for how to do
   1406 this, providing various trade-offs.</p>
   1407 
   1408 <!-- _______________________________________________________________________ -->
   1409 <h4>
   1410   <a name="dss_sortedvectorset">A sorted 'vector'</a>
   1411 </h4>
   1412 
   1413 <div>
   1414 
   1415 <p>If you intend to insert a lot of elements, then do a lot of queries, a
   1416 great approach is to use a vector (or other sequential container) with
   1417 std::sort+std::unique to remove duplicates.  This approach works really well if
   1418 your usage pattern has these two distinct phases (insert then query), and can be
   1419 coupled with a good choice of <a href="#ds_sequential">sequential container</a>.
   1420 </p>
   1421 
   1422 <p>
   1423 This combination provides the several nice properties: the result data is
   1424 contiguous in memory (good for cache locality), has few allocations, is easy to
   1425 address (iterators in the final vector are just indices or pointers), and can be
   1426 efficiently queried with a standard binary or radix search.</p>
   1427 
   1428 </div>
   1429 
   1430 <!-- _______________________________________________________________________ -->
   1431 <h4>
   1432   <a name="dss_smallset">"llvm/ADT/SmallSet.h"</a>
   1433 </h4>
   1434 
   1435 <div>
   1436 
   1437 <p>If you have a set-like data structure that is usually small and whose elements
   1438 are reasonably small, a <tt>SmallSet&lt;Type, N&gt;</tt> is a good choice.  This set
   1439 has space for N elements in place (thus, if the set is dynamically smaller than
   1440 N, no malloc traffic is required) and accesses them with a simple linear search.
   1441 When the set grows beyond 'N' elements, it allocates a more expensive representation that
   1442 guarantees efficient access (for most types, it falls back to std::set, but for
   1443 pointers it uses something far better, <a
   1444 href="#dss_smallptrset">SmallPtrSet</a>).</p>
   1445 
   1446 <p>The magic of this class is that it handles small sets extremely efficiently,
   1447 but gracefully handles extremely large sets without loss of efficiency.  The
   1448 drawback is that the interface is quite small: it supports insertion, queries
   1449 and erasing, but does not support iteration.</p>
   1450 
   1451 </div>
   1452 
   1453 <!-- _______________________________________________________________________ -->
   1454 <h4>
   1455   <a name="dss_smallptrset">"llvm/ADT/SmallPtrSet.h"</a>
   1456 </h4>
   1457 
   1458 <div>
   1459 
   1460 <p>SmallPtrSet has all the advantages of <tt>SmallSet</tt> (and a <tt>SmallSet</tt> of pointers is 
   1461 transparently implemented with a <tt>SmallPtrSet</tt>), but also supports iterators.  If
   1462 more than 'N' insertions are performed, a single quadratically
   1463 probed hash table is allocated and grows as needed, providing extremely
   1464 efficient access (constant time insertion/deleting/queries with low constant
   1465 factors) and is very stingy with malloc traffic.</p>
   1466 
   1467 <p>Note that, unlike <tt>std::set</tt>, the iterators of <tt>SmallPtrSet</tt> are invalidated
   1468 whenever an insertion occurs.  Also, the values visited by the iterators are not
   1469 visited in sorted order.</p>
   1470 
   1471 </div>
   1472 
   1473 <!-- _______________________________________________________________________ -->
   1474 <h4>
   1475   <a name="dss_denseset">"llvm/ADT/DenseSet.h"</a>
   1476 </h4>
   1477 
   1478 <div>
   1479 
   1480 <p>
   1481 DenseSet is a simple quadratically probed hash table.  It excels at supporting
   1482 small values: it uses a single allocation to hold all of the pairs that
   1483 are currently inserted in the set.  DenseSet is a great way to unique small
   1484 values that are not simple pointers (use <a 
   1485 href="#dss_smallptrset">SmallPtrSet</a> for pointers).  Note that DenseSet has
   1486 the same requirements for the value type that <a 
   1487 href="#dss_densemap">DenseMap</a> has.
   1488 </p>
   1489 
   1490 </div>
   1491 
   1492 <!-- _______________________________________________________________________ -->
   1493 <h4>
   1494   <a name="dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a>
   1495 </h4>
   1496 
   1497 <div>
   1498 
   1499 <p>
   1500 FoldingSet is an aggregate class that is really good at uniquing
   1501 expensive-to-create or polymorphic objects.  It is a combination of a chained
   1502 hash table with intrusive links (uniqued objects are required to inherit from
   1503 FoldingSetNode) that uses <a href="#dss_smallvector">SmallVector</a> as part of
   1504 its ID process.</p>
   1505 
   1506 <p>Consider a case where you want to implement a "getOrCreateFoo" method for
   1507 a complex object (for example, a node in the code generator).  The client has a
   1508 description of *what* it wants to generate (it knows the opcode and all the
   1509 operands), but we don't want to 'new' a node, then try inserting it into a set
   1510 only to find out it already exists, at which point we would have to delete it
   1511 and return the node that already exists.
   1512 </p>
   1513 
   1514 <p>To support this style of client, FoldingSet perform a query with a
   1515 FoldingSetNodeID (which wraps SmallVector) that can be used to describe the
   1516 element that we want to query for.  The query either returns the element
   1517 matching the ID or it returns an opaque ID that indicates where insertion should
   1518 take place.  Construction of the ID usually does not require heap traffic.</p>
   1519 
   1520 <p>Because FoldingSet uses intrusive links, it can support polymorphic objects
   1521 in the set (for example, you can have SDNode instances mixed with LoadSDNodes).
   1522 Because the elements are individually allocated, pointers to the elements are
   1523 stable: inserting or removing elements does not invalidate any pointers to other
   1524 elements.
   1525 </p>
   1526 
   1527 </div>
   1528 
   1529 <!-- _______________________________________________________________________ -->
   1530 <h4>
   1531   <a name="dss_set">&lt;set&gt;</a>
   1532 </h4>
   1533 
   1534 <div>
   1535 
   1536 <p><tt>std::set</tt> is a reasonable all-around set class, which is decent at
   1537 many things but great at nothing.  std::set allocates memory for each element
   1538 inserted (thus it is very malloc intensive) and typically stores three pointers
   1539 per element in the set (thus adding a large amount of per-element space
   1540 overhead).  It offers guaranteed log(n) performance, which is not particularly
   1541 fast from a complexity standpoint (particularly if the elements of the set are
   1542 expensive to compare, like strings), and has extremely high constant factors for
   1543 lookup, insertion and removal.</p>
   1544 
   1545 <p>The advantages of std::set are that its iterators are stable (deleting or
   1546 inserting an element from the set does not affect iterators or pointers to other
   1547 elements) and that iteration over the set is guaranteed to be in sorted order.
   1548 If the elements in the set are large, then the relative overhead of the pointers
   1549 and malloc traffic is not a big deal, but if the elements of the set are small,
   1550 std::set is almost never a good choice.</p>
   1551 
   1552 </div>
   1553 
   1554 <!-- _______________________________________________________________________ -->
   1555 <h4>
   1556   <a name="dss_setvector">"llvm/ADT/SetVector.h"</a>
   1557 </h4>
   1558 
   1559 <div>
   1560 <p>LLVM's SetVector&lt;Type&gt; is an adapter class that combines your choice of
   1561 a set-like container along with a <a href="#ds_sequential">Sequential 
   1562 Container</a>.  The important property
   1563 that this provides is efficient insertion with uniquing (duplicate elements are
   1564 ignored) with iteration support.  It implements this by inserting elements into
   1565 both a set-like container and the sequential container, using the set-like
   1566 container for uniquing and the sequential container for iteration.
   1567 </p>
   1568 
   1569 <p>The difference between SetVector and other sets is that the order of
   1570 iteration is guaranteed to match the order of insertion into the SetVector.
   1571 This property is really important for things like sets of pointers.  Because
   1572 pointer values are non-deterministic (e.g. vary across runs of the program on
   1573 different machines), iterating over the pointers in the set will
   1574 not be in a well-defined order.</p>
   1575 
   1576 <p>
   1577 The drawback of SetVector is that it requires twice as much space as a normal
   1578 set and has the sum of constant factors from the set-like container and the 
   1579 sequential container that it uses.  Use it *only* if you need to iterate over 
   1580 the elements in a deterministic order.  SetVector is also expensive to delete
   1581 elements out of (linear time), unless you use it's "pop_back" method, which is
   1582 faster.
   1583 </p>
   1584 
   1585 <p><tt>SetVector</tt> is an adapter class that defaults to
   1586    using <tt>std::vector</tt> and a size 16 <tt>SmallSet</tt> for the underlying
   1587    containers, so it is quite expensive. However,
   1588    <tt>"llvm/ADT/SetVector.h"</tt> also provides a <tt>SmallSetVector</tt>
   1589    class, which defaults to using a <tt>SmallVector</tt> and <tt>SmallSet</tt>
   1590    of a specified size. If you use this, and if your sets are dynamically
   1591    smaller than <tt>N</tt>, you will save a lot of heap traffic.</p>
   1592 
   1593 </div>
   1594 
   1595 <!-- _______________________________________________________________________ -->
   1596 <h4>
   1597   <a name="dss_uniquevector">"llvm/ADT/UniqueVector.h"</a>
   1598 </h4>
   1599 
   1600 <div>
   1601 
   1602 <p>
   1603 UniqueVector is similar to <a href="#dss_setvector">SetVector</a>, but it
   1604 retains a unique ID for each element inserted into the set.  It internally
   1605 contains a map and a vector, and it assigns a unique ID for each value inserted
   1606 into the set.</p>
   1607 
   1608 <p>UniqueVector is very expensive: its cost is the sum of the cost of
   1609 maintaining both the map and vector, it has high complexity, high constant
   1610 factors, and produces a lot of malloc traffic.  It should be avoided.</p>
   1611 
   1612 </div>
   1613 
   1614 
   1615 <!-- _______________________________________________________________________ -->
   1616 <h4>
   1617   <a name="dss_otherset">Other Set-Like Container Options</a>
   1618 </h4>
   1619 
   1620 <div>
   1621 
   1622 <p>
   1623 The STL provides several other options, such as std::multiset and the various 
   1624 "hash_set" like containers (whether from C++ TR1 or from the SGI library). We
   1625 never use hash_set and unordered_set because they are generally very expensive 
   1626 (each insertion requires a malloc) and very non-portable.
   1627 </p>
   1628 
   1629 <p>std::multiset is useful if you're not interested in elimination of
   1630 duplicates, but has all the drawbacks of std::set.  A sorted vector (where you 
   1631 don't delete duplicate entries) or some other approach is almost always
   1632 better.</p>
   1633 
   1634 </div>
   1635 
   1636 </div>
   1637 
   1638 <!-- ======================================================================= -->
   1639 <h3>
   1640   <a name="ds_map">Map-Like Containers (std::map, DenseMap, etc)</a>
   1641 </h3>
   1642 
   1643 <div>
   1644 Map-like containers are useful when you want to associate data to a key.  As
   1645 usual, there are a lot of different ways to do this. :)
   1646 
   1647 <!-- _______________________________________________________________________ -->
   1648 <h4>
   1649   <a name="dss_sortedvectormap">A sorted 'vector'</a>
   1650 </h4>
   1651 
   1652 <div>
   1653 
   1654 <p>
   1655 If your usage pattern follows a strict insert-then-query approach, you can
   1656 trivially use the same approach as <a href="#dss_sortedvectorset">sorted vectors
   1657 for set-like containers</a>.  The only difference is that your query function
   1658 (which uses std::lower_bound to get efficient log(n) lookup) should only compare
   1659 the key, not both the key and value.  This yields the same advantages as sorted
   1660 vectors for sets.
   1661 </p>
   1662 </div>
   1663 
   1664 <!-- _______________________________________________________________________ -->
   1665 <h4>
   1666   <a name="dss_stringmap">"llvm/ADT/StringMap.h"</a>
   1667 </h4>
   1668 
   1669 <div>
   1670 
   1671 <p>
   1672 Strings are commonly used as keys in maps, and they are difficult to support
   1673 efficiently: they are variable length, inefficient to hash and compare when
   1674 long, expensive to copy, etc.  StringMap is a specialized container designed to
   1675 cope with these issues.  It supports mapping an arbitrary range of bytes to an
   1676 arbitrary other object.</p>
   1677 
   1678 <p>The StringMap implementation uses a quadratically-probed hash table, where
   1679 the buckets store a pointer to the heap allocated entries (and some other
   1680 stuff).  The entries in the map must be heap allocated because the strings are
   1681 variable length.  The string data (key) and the element object (value) are
   1682 stored in the same allocation with the string data immediately after the element
   1683 object.  This container guarantees the "<tt>(char*)(&amp;Value+1)</tt>" points
   1684 to the key string for a value.</p>
   1685 
   1686 <p>The StringMap is very fast for several reasons: quadratic probing is very
   1687 cache efficient for lookups, the hash value of strings in buckets is not
   1688 recomputed when looking up an element, StringMap rarely has to touch the
   1689 memory for unrelated objects when looking up a value (even when hash collisions
   1690 happen), hash table growth does not recompute the hash values for strings
   1691 already in the table, and each pair in the map is store in a single allocation
   1692 (the string data is stored in the same allocation as the Value of a pair).</p>
   1693 
   1694 <p>StringMap also provides query methods that take byte ranges, so it only ever
   1695 copies a string if a value is inserted into the table.</p>
   1696 </div>
   1697 
   1698 <!-- _______________________________________________________________________ -->
   1699 <h4>
   1700   <a name="dss_indexedmap">"llvm/ADT/IndexedMap.h"</a>
   1701 </h4>
   1702 
   1703 <div>
   1704 <p>
   1705 IndexedMap is a specialized container for mapping small dense integers (or
   1706 values that can be mapped to small dense integers) to some other type.  It is
   1707 internally implemented as a vector with a mapping function that maps the keys to
   1708 the dense integer range.
   1709 </p>
   1710 
   1711 <p>
   1712 This is useful for cases like virtual registers in the LLVM code generator: they
   1713 have a dense mapping that is offset by a compile-time constant (the first
   1714 virtual register ID).</p>
   1715 
   1716 </div>
   1717 
   1718 <!-- _______________________________________________________________________ -->
   1719 <h4>
   1720   <a name="dss_densemap">"llvm/ADT/DenseMap.h"</a>
   1721 </h4>
   1722 
   1723 <div>
   1724 
   1725 <p>
   1726 DenseMap is a simple quadratically probed hash table.  It excels at supporting
   1727 small keys and values: it uses a single allocation to hold all of the pairs that
   1728 are currently inserted in the map.  DenseMap is a great way to map pointers to
   1729 pointers, or map other small types to each other.
   1730 </p>
   1731 
   1732 <p>
   1733 There are several aspects of DenseMap that you should be aware of, however.  The
   1734 iterators in a densemap are invalidated whenever an insertion occurs, unlike
   1735 map.  Also, because DenseMap allocates space for a large number of key/value
   1736 pairs (it starts with 64 by default), it will waste a lot of space if your keys
   1737 or values are large.  Finally, you must implement a partial specialization of
   1738 DenseMapInfo for the key that you want, if it isn't already supported.  This
   1739 is required to tell DenseMap about two special marker values (which can never be
   1740 inserted into the map) that it needs internally.</p>
   1741 
   1742 </div>
   1743 
   1744 <!-- _______________________________________________________________________ -->
   1745 <h4>
   1746   <a name="dss_valuemap">"llvm/ADT/ValueMap.h"</a>
   1747 </h4>
   1748 
   1749 <div>
   1750 
   1751 <p>
   1752 ValueMap is a wrapper around a <a href="#dss_densemap">DenseMap</a> mapping
   1753 Value*s (or subclasses) to another type.  When a Value is deleted or RAUW'ed,
   1754 ValueMap will update itself so the new version of the key is mapped to the same
   1755 value, just as if the key were a WeakVH.  You can configure exactly how this
   1756 happens, and what else happens on these two events, by passing
   1757 a <code>Config</code> parameter to the ValueMap template.</p>
   1758 
   1759 </div>
   1760 
   1761 <!-- _______________________________________________________________________ -->
   1762 <h4>
   1763   <a name="dss_intervalmap">"llvm/ADT/IntervalMap.h"</a>
   1764 </h4>
   1765 
   1766 <div>
   1767 
   1768 <p> IntervalMap is a compact map for small keys and values. It maps key
   1769 intervals instead of single keys, and it will automatically coalesce adjacent
   1770 intervals. When then map only contains a few intervals, they are stored in the
   1771 map object itself to avoid allocations.</p>
   1772 
   1773 <p> The IntervalMap iterators are quite big, so they should not be passed around
   1774 as STL iterators. The heavyweight iterators allow a smaller data structure.</p>
   1775 
   1776 </div>
   1777 
   1778 <!-- _______________________________________________________________________ -->
   1779 <h4>
   1780   <a name="dss_map">&lt;map&gt;</a>
   1781 </h4>
   1782 
   1783 <div>
   1784 
   1785 <p>
   1786 std::map has similar characteristics to <a href="#dss_set">std::set</a>: it uses
   1787 a single allocation per pair inserted into the map, it offers log(n) lookup with
   1788 an extremely large constant factor, imposes a space penalty of 3 pointers per
   1789 pair in the map, etc.</p>
   1790 
   1791 <p>std::map is most useful when your keys or values are very large, if you need
   1792 to iterate over the collection in sorted order, or if you need stable iterators
   1793 into the map (i.e. they don't get invalidated if an insertion or deletion of
   1794 another element takes place).</p>
   1795 
   1796 </div>
   1797 
   1798 <!-- _______________________________________________________________________ -->
   1799 <h4>
   1800   <a name="dss_inteqclasses">"llvm/ADT/IntEqClasses.h"</a>
   1801 </h4>
   1802 
   1803 <div>
   1804 
   1805 <p>IntEqClasses provides a compact representation of equivalence classes of
   1806 small integers. Initially, each integer in the range 0..n-1 has its own
   1807 equivalence class. Classes can be joined by passing two class representatives to
   1808 the join(a, b) method. Two integers are in the same class when findLeader()
   1809 returns the same representative.</p>
   1810 
   1811 <p>Once all equivalence classes are formed, the map can be compressed so each
   1812 integer 0..n-1 maps to an equivalence class number in the range 0..m-1, where m
   1813 is the total number of equivalence classes. The map must be uncompressed before
   1814 it can be edited again.</p>
   1815 
   1816 </div>
   1817 
   1818 <!-- _______________________________________________________________________ -->
   1819 <h4>
   1820   <a name="dss_othermap">Other Map-Like Container Options</a>
   1821 </h4>
   1822 
   1823 <div>
   1824 
   1825 <p>
   1826 The STL provides several other options, such as std::multimap and the various 
   1827 "hash_map" like containers (whether from C++ TR1 or from the SGI library). We
   1828 never use hash_set and unordered_set because they are generally very expensive 
   1829 (each insertion requires a malloc) and very non-portable.</p>
   1830 
   1831 <p>std::multimap is useful if you want to map a key to multiple values, but has
   1832 all the drawbacks of std::map.  A sorted vector or some other approach is almost
   1833 always better.</p>
   1834 
   1835 </div>
   1836 
   1837 </div>
   1838 
   1839 <!-- ======================================================================= -->
   1840 <h3>
   1841   <a name="ds_bit">Bit storage containers (BitVector, SparseBitVector)</a>
   1842 </h3>
   1843 
   1844 <div>
   1845 <p>Unlike the other containers, there are only two bit storage containers, and 
   1846 choosing when to use each is relatively straightforward.</p>
   1847 
   1848 <p>One additional option is 
   1849 <tt>std::vector&lt;bool&gt;</tt>: we discourage its use for two reasons 1) the
   1850 implementation in many common compilers (e.g. commonly available versions of 
   1851 GCC) is extremely inefficient and 2) the C++ standards committee is likely to
   1852 deprecate this container and/or change it significantly somehow.  In any case,
   1853 please don't use it.</p>
   1854 
   1855 <!-- _______________________________________________________________________ -->
   1856 <h4>
   1857   <a name="dss_bitvector">BitVector</a>
   1858 </h4>
   1859 
   1860 <div>
   1861 <p> The BitVector container provides a dynamic size set of bits for manipulation.
   1862 It supports individual bit setting/testing, as well as set operations.  The set
   1863 operations take time O(size of bitvector), but operations are performed one word
   1864 at a time, instead of one bit at a time.  This makes the BitVector very fast for
   1865 set operations compared to other containers.  Use the BitVector when you expect
   1866 the number of set bits to be high (IE a dense set).
   1867 </p>
   1868 </div>
   1869 
   1870 <!-- _______________________________________________________________________ -->
   1871 <h4>
   1872   <a name="dss_smallbitvector">SmallBitVector</a>
   1873 </h4>
   1874 
   1875 <div>
   1876 <p> The SmallBitVector container provides the same interface as BitVector, but
   1877 it is optimized for the case where only a small number of bits, less than
   1878 25 or so, are needed. It also transparently supports larger bit counts, but
   1879 slightly less efficiently than a plain BitVector, so SmallBitVector should
   1880 only be used when larger counts are rare.
   1881 </p>
   1882 
   1883 <p>
   1884 At this time, SmallBitVector does not support set operations (and, or, xor),
   1885 and its operator[] does not provide an assignable lvalue.
   1886 </p>
   1887 </div>
   1888 
   1889 <!-- _______________________________________________________________________ -->
   1890 <h4>
   1891   <a name="dss_sparsebitvector">SparseBitVector</a>
   1892 </h4>
   1893 
   1894 <div>
   1895 <p> The SparseBitVector container is much like BitVector, with one major
   1896 difference: Only the bits that are set, are stored.  This makes the
   1897 SparseBitVector much more space efficient than BitVector when the set is sparse,
   1898 as well as making set operations O(number of set bits) instead of O(size of
   1899 universe).  The downside to the SparseBitVector is that setting and testing of random bits is O(N), and on large SparseBitVectors, this can be slower than BitVector. In our implementation, setting or testing bits in sorted order
   1900 (either forwards or reverse) is O(1) worst case.  Testing and setting bits within 128 bits (depends on size) of the current bit is also O(1).  As a general statement, testing/setting bits in a SparseBitVector is O(distance away from last set bit).
   1901 </p>
   1902 </div>
   1903 
   1904 </div>
   1905 
   1906 </div>
   1907 
   1908 <!-- *********************************************************************** -->
   1909 <h2>
   1910   <a name="common">Helpful Hints for Common Operations</a>
   1911 </h2>
   1912 <!-- *********************************************************************** -->
   1913 
   1914 <div>
   1915 
   1916 <p>This section describes how to perform some very simple transformations of
   1917 LLVM code.  This is meant to give examples of common idioms used, showing the
   1918 practical side of LLVM transformations.  <p> Because this is a "how-to" section,
   1919 you should also read about the main classes that you will be working with.  The
   1920 <a href="#coreclasses">Core LLVM Class Hierarchy Reference</a> contains details
   1921 and descriptions of the main classes that you should know about.</p>
   1922 
   1923 <!-- NOTE: this section should be heavy on example code -->
   1924 <!-- ======================================================================= -->
   1925 <h3>
   1926   <a name="inspection">Basic Inspection and Traversal Routines</a>
   1927 </h3>
   1928 
   1929 <div>
   1930 
   1931 <p>The LLVM compiler infrastructure have many different data structures that may
   1932 be traversed.  Following the example of the C++ standard template library, the
   1933 techniques used to traverse these various data structures are all basically the
   1934 same.  For a enumerable sequence of values, the <tt>XXXbegin()</tt> function (or
   1935 method) returns an iterator to the start of the sequence, the <tt>XXXend()</tt>
   1936 function returns an iterator pointing to one past the last valid element of the
   1937 sequence, and there is some <tt>XXXiterator</tt> data type that is common
   1938 between the two operations.</p>
   1939 
   1940 <p>Because the pattern for iteration is common across many different aspects of
   1941 the program representation, the standard template library algorithms may be used
   1942 on them, and it is easier to remember how to iterate. First we show a few common
   1943 examples of the data structures that need to be traversed.  Other data
   1944 structures are traversed in very similar ways.</p>
   1945 
   1946 <!-- _______________________________________________________________________ -->
   1947 <h4>
   1948   <a name="iterate_function">Iterating over the </a><a
   1949   href="#BasicBlock"><tt>BasicBlock</tt></a>s in a <a
   1950   href="#Function"><tt>Function</tt></a>
   1951 </h4>
   1952 
   1953 <div>
   1954 
   1955 <p>It's quite common to have a <tt>Function</tt> instance that you'd like to
   1956 transform in some way; in particular, you'd like to manipulate its
   1957 <tt>BasicBlock</tt>s.  To facilitate this, you'll need to iterate over all of
   1958 the <tt>BasicBlock</tt>s that constitute the <tt>Function</tt>. The following is
   1959 an example that prints the name of a <tt>BasicBlock</tt> and the number of
   1960 <tt>Instruction</tt>s it contains:</p>
   1961 
   1962 <div class="doc_code">
   1963 <pre>
   1964 // <i>func is a pointer to a Function instance</i>
   1965 for (Function::iterator i = func-&gt;begin(), e = func-&gt;end(); i != e; ++i)
   1966   // <i>Print out the name of the basic block if it has one, and then the</i>
   1967   // <i>number of instructions that it contains</i>
   1968   errs() &lt;&lt; "Basic block (name=" &lt;&lt; i-&gt;getName() &lt;&lt; ") has "
   1969              &lt;&lt; i-&gt;size() &lt;&lt; " instructions.\n";
   1970 </pre>
   1971 </div>
   1972 
   1973 <p>Note that i can be used as if it were a pointer for the purposes of
   1974 invoking member functions of the <tt>Instruction</tt> class.  This is
   1975 because the indirection operator is overloaded for the iterator
   1976 classes.  In the above code, the expression <tt>i-&gt;size()</tt> is
   1977 exactly equivalent to <tt>(*i).size()</tt> just like you'd expect.</p>
   1978 
   1979 </div>
   1980 
   1981 <!-- _______________________________________________________________________ -->
   1982 <h4>
   1983   <a name="iterate_basicblock">Iterating over the </a><a
   1984   href="#Instruction"><tt>Instruction</tt></a>s in a <a
   1985   href="#BasicBlock"><tt>BasicBlock</tt></a>
   1986 </h4>
   1987 
   1988 <div>
   1989 
   1990 <p>Just like when dealing with <tt>BasicBlock</tt>s in <tt>Function</tt>s, it's
   1991 easy to iterate over the individual instructions that make up
   1992 <tt>BasicBlock</tt>s. Here's a code snippet that prints out each instruction in
   1993 a <tt>BasicBlock</tt>:</p>
   1994 
   1995 <div class="doc_code">
   1996 <pre>
   1997 // <i>blk is a pointer to a BasicBlock instance</i>
   1998 for (BasicBlock::iterator i = blk-&gt;begin(), e = blk-&gt;end(); i != e; ++i)
   1999    // <i>The next statement works since operator&lt;&lt;(ostream&amp;,...)</i>
   2000    // <i>is overloaded for Instruction&amp;</i>
   2001    errs() &lt;&lt; *i &lt;&lt; "\n";
   2002 </pre>
   2003 </div>
   2004 
   2005 <p>However, this isn't really the best way to print out the contents of a
   2006 <tt>BasicBlock</tt>!  Since the ostream operators are overloaded for virtually
   2007 anything you'll care about, you could have just invoked the print routine on the
   2008 basic block itself: <tt>errs() &lt;&lt; *blk &lt;&lt; "\n";</tt>.</p>
   2009 
   2010 </div>
   2011 
   2012 <!-- _______________________________________________________________________ -->
   2013 <h4>
   2014   <a name="iterate_institer">Iterating over the </a><a
   2015   href="#Instruction"><tt>Instruction</tt></a>s in a <a
   2016   href="#Function"><tt>Function</tt></a>
   2017 </h4>
   2018 
   2019 <div>
   2020 
   2021 <p>If you're finding that you commonly iterate over a <tt>Function</tt>'s
   2022 <tt>BasicBlock</tt>s and then that <tt>BasicBlock</tt>'s <tt>Instruction</tt>s,
   2023 <tt>InstIterator</tt> should be used instead. You'll need to include <a
   2024 href="/doxygen/InstIterator_8h-source.html"><tt>llvm/Support/InstIterator.h</tt></a>,
   2025 and then instantiate <tt>InstIterator</tt>s explicitly in your code.  Here's a
   2026 small example that shows how to dump all instructions in a function to the standard error stream:<p>
   2027 
   2028 <div class="doc_code">
   2029 <pre>
   2030 #include "<a href="/doxygen/InstIterator_8h-source.html">llvm/Support/InstIterator.h</a>"
   2031 
   2032 // <i>F is a pointer to a Function instance</i>
   2033 for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
   2034   errs() &lt;&lt; *I &lt;&lt; "\n";
   2035 </pre>
   2036 </div>
   2037 
   2038 <p>Easy, isn't it?  You can also use <tt>InstIterator</tt>s to fill a
   2039 work list with its initial contents.  For example, if you wanted to
   2040 initialize a work list to contain all instructions in a <tt>Function</tt>
   2041 F, all you would need to do is something like:</p>
   2042 
   2043 <div class="doc_code">
   2044 <pre>
   2045 std::set&lt;Instruction*&gt; worklist;
   2046 // or better yet, SmallPtrSet&lt;Instruction*, 64&gt; worklist;
   2047 
   2048 for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
   2049    worklist.insert(&amp;*I);
   2050 </pre>
   2051 </div>
   2052 
   2053 <p>The STL set <tt>worklist</tt> would now contain all instructions in the
   2054 <tt>Function</tt> pointed to by F.</p>
   2055 
   2056 </div>
   2057 
   2058 <!-- _______________________________________________________________________ -->
   2059 <h4>
   2060   <a name="iterate_convert">Turning an iterator into a class pointer (and
   2061   vice-versa)</a>
   2062 </h4>
   2063 
   2064 <div>
   2065 
   2066 <p>Sometimes, it'll be useful to grab a reference (or pointer) to a class
   2067 instance when all you've got at hand is an iterator.  Well, extracting
   2068 a reference or a pointer from an iterator is very straight-forward.
   2069 Assuming that <tt>i</tt> is a <tt>BasicBlock::iterator</tt> and <tt>j</tt>
   2070 is a <tt>BasicBlock::const_iterator</tt>:</p>
   2071 
   2072 <div class="doc_code">
   2073 <pre>
   2074 Instruction&amp; inst = *i;   // <i>Grab reference to instruction reference</i>
   2075 Instruction* pinst = &amp;*i; // <i>Grab pointer to instruction reference</i>
   2076 const Instruction&amp; inst = *j;
   2077 </pre>
   2078 </div>
   2079 
   2080 <p>However, the iterators you'll be working with in the LLVM framework are
   2081 special: they will automatically convert to a ptr-to-instance type whenever they
   2082 need to.  Instead of dereferencing the iterator and then taking the address of
   2083 the result, you can simply assign the iterator to the proper pointer type and
   2084 you get the dereference and address-of operation as a result of the assignment
   2085 (behind the scenes, this is a result of overloading casting mechanisms).  Thus
   2086 the last line of the last example,</p>
   2087 
   2088 <div class="doc_code">
   2089 <pre>
   2090 Instruction *pinst = &amp;*i;
   2091 </pre>
   2092 </div>
   2093 
   2094 <p>is semantically equivalent to</p>
   2095 
   2096 <div class="doc_code">
   2097 <pre>
   2098 Instruction *pinst = i;
   2099 </pre>
   2100 </div>
   2101 
   2102 <p>It's also possible to turn a class pointer into the corresponding iterator,
   2103 and this is a constant time operation (very efficient).  The following code
   2104 snippet illustrates use of the conversion constructors provided by LLVM
   2105 iterators.  By using these, you can explicitly grab the iterator of something
   2106 without actually obtaining it via iteration over some structure:</p>
   2107 
   2108 <div class="doc_code">
   2109 <pre>
   2110 void printNextInstruction(Instruction* inst) {
   2111   BasicBlock::iterator it(inst);
   2112   ++it; // <i>After this line, it refers to the instruction after *inst</i>
   2113   if (it != inst-&gt;getParent()-&gt;end()) errs() &lt;&lt; *it &lt;&lt; "\n";
   2114 }
   2115 </pre>
   2116 </div>
   2117 
   2118 <p>Unfortunately, these implicit conversions come at a cost; they prevent
   2119 these iterators from conforming to standard iterator conventions, and thus
   2120 from being usable with standard algorithms and containers. For example, they
   2121 prevent the following code, where <tt>B</tt> is a <tt>BasicBlock</tt>,
   2122 from compiling:</p>
   2123 
   2124 <div class="doc_code">
   2125 <pre>
   2126   llvm::SmallVector&lt;llvm::Instruction *, 16&gt;(B-&gt;begin(), B-&gt;end());
   2127 </pre>
   2128 </div>
   2129 
   2130 <p>Because of this, these implicit conversions may be removed some day,
   2131 and <tt>operator*</tt> changed to return a pointer instead of a reference.</p>
   2132 
   2133 </div>
   2134 
   2135 <!--_______________________________________________________________________-->
   2136 <h4>
   2137   <a name="iterate_complex">Finding call sites: a slightly more complex
   2138   example</a>
   2139 </h4>
   2140 
   2141 <div>
   2142 
   2143 <p>Say that you're writing a FunctionPass and would like to count all the
   2144 locations in the entire module (that is, across every <tt>Function</tt>) where a
   2145 certain function (i.e., some <tt>Function</tt>*) is already in scope.  As you'll
   2146 learn later, you may want to use an <tt>InstVisitor</tt> to accomplish this in a
   2147 much more straight-forward manner, but this example will allow us to explore how
   2148 you'd do it if you didn't have <tt>InstVisitor</tt> around. In pseudo-code, this
   2149 is what we want to do:</p>
   2150 
   2151 <div class="doc_code">
   2152 <pre>
   2153 initialize callCounter to zero
   2154 for each Function f in the Module
   2155   for each BasicBlock b in f
   2156     for each Instruction i in b
   2157       if (i is a CallInst and calls the given function)
   2158         increment callCounter
   2159 </pre>
   2160 </div>
   2161 
   2162 <p>And the actual code is (remember, because we're writing a
   2163 <tt>FunctionPass</tt>, our <tt>FunctionPass</tt>-derived class simply has to
   2164 override the <tt>runOnFunction</tt> method):</p>
   2165 
   2166 <div class="doc_code">
   2167 <pre>
   2168 Function* targetFunc = ...;
   2169 
   2170 class OurFunctionPass : public FunctionPass {
   2171   public:
   2172     OurFunctionPass(): callCounter(0) { }
   2173 
   2174     virtual runOnFunction(Function&amp; F) {
   2175       for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) {
   2176         for (BasicBlock::iterator i = b-&gt;begin(), ie = b-&gt;end(); i != ie; ++i) {
   2177           if (<a href="#CallInst">CallInst</a>* callInst = <a href="#isa">dyn_cast</a>&lt;<a
   2178  href="#CallInst">CallInst</a>&gt;(&amp;*i)) {
   2179             // <i>We know we've encountered a call instruction, so we</i>
   2180             // <i>need to determine if it's a call to the</i>
   2181             // <i>function pointed to by m_func or not.</i>
   2182             if (callInst-&gt;getCalledFunction() == targetFunc)
   2183               ++callCounter;
   2184           }
   2185         }
   2186       }
   2187     }
   2188 
   2189   private:
   2190     unsigned callCounter;
   2191 };
   2192 </pre>
   2193 </div>
   2194 
   2195 </div>
   2196 
   2197 <!--_______________________________________________________________________-->
   2198 <h4>
   2199   <a name="calls_and_invokes">Treating calls and invokes the same way</a>
   2200 </h4>
   2201 
   2202 <div>
   2203 
   2204 <p>You may have noticed that the previous example was a bit oversimplified in
   2205 that it did not deal with call sites generated by 'invoke' instructions. In
   2206 this, and in other situations, you may find that you want to treat
   2207 <tt>CallInst</tt>s and <tt>InvokeInst</tt>s the same way, even though their
   2208 most-specific common base class is <tt>Instruction</tt>, which includes lots of
   2209 less closely-related things. For these cases, LLVM provides a handy wrapper
   2210 class called <a
   2211 href="http://llvm.org/doxygen/classllvm_1_1CallSite.html"><tt>CallSite</tt></a>.
   2212 It is essentially a wrapper around an <tt>Instruction</tt> pointer, with some
   2213 methods that provide functionality common to <tt>CallInst</tt>s and
   2214 <tt>InvokeInst</tt>s.</p>
   2215 
   2216 <p>This class has "value semantics": it should be passed by value, not by
   2217 reference and it should not be dynamically allocated or deallocated using
   2218 <tt>operator new</tt> or <tt>operator delete</tt>. It is efficiently copyable,
   2219 assignable and constructable, with costs equivalents to that of a bare pointer.
   2220 If you look at its definition, it has only a single pointer member.</p>
   2221 
   2222 </div>
   2223 
   2224 <!--_______________________________________________________________________-->
   2225 <h4>
   2226   <a name="iterate_chains">Iterating over def-use &amp; use-def chains</a>
   2227 </h4>
   2228 
   2229 <div>
   2230 
   2231 <p>Frequently, we might have an instance of the <a
   2232 href="/doxygen/classllvm_1_1Value.html">Value Class</a> and we want to
   2233 determine which <tt>User</tt>s use the <tt>Value</tt>.  The list of all
   2234 <tt>User</tt>s of a particular <tt>Value</tt> is called a <i>def-use</i> chain.
   2235 For example, let's say we have a <tt>Function*</tt> named <tt>F</tt> to a
   2236 particular function <tt>foo</tt>. Finding all of the instructions that
   2237 <i>use</i> <tt>foo</tt> is as simple as iterating over the <i>def-use</i> chain
   2238 of <tt>F</tt>:</p>
   2239 
   2240 <div class="doc_code">
   2241 <pre>
   2242 Function *F = ...;
   2243 
   2244 for (Value::use_iterator i = F-&gt;use_begin(), e = F-&gt;use_end(); i != e; ++i)
   2245   if (Instruction *Inst = dyn_cast&lt;Instruction&gt;(*i)) {
   2246     errs() &lt;&lt; "F is used in instruction:\n";
   2247     errs() &lt;&lt; *Inst &lt;&lt; "\n";
   2248   }
   2249 </pre>
   2250 </div>
   2251 
   2252 <p>Note that dereferencing a <tt>Value::use_iterator</tt> is not a very cheap
   2253 operation. Instead of performing <tt>*i</tt> above several times, consider
   2254 doing it only once in the loop body and reusing its result.</p>
   2255 
   2256 <p>Alternatively, it's common to have an instance of the <a
   2257 href="/doxygen/classllvm_1_1User.html">User Class</a> and need to know what
   2258 <tt>Value</tt>s are used by it.  The list of all <tt>Value</tt>s used by a
   2259 <tt>User</tt> is known as a <i>use-def</i> chain.  Instances of class
   2260 <tt>Instruction</tt> are common <tt>User</tt>s, so we might want to iterate over
   2261 all of the values that a particular instruction uses (that is, the operands of
   2262 the particular <tt>Instruction</tt>):</p>
   2263 
   2264 <div class="doc_code">
   2265 <pre>
   2266 Instruction *pi = ...;
   2267 
   2268 for (User::op_iterator i = pi-&gt;op_begin(), e = pi-&gt;op_end(); i != e; ++i) {
   2269   Value *v = *i;
   2270   // <i>...</i>
   2271 }
   2272 </pre>
   2273 </div>
   2274 
   2275 <p>Declaring objects as <tt>const</tt> is an important tool of enforcing
   2276 mutation free algorithms (such as analyses, etc.). For this purpose above
   2277 iterators come in constant flavors as <tt>Value::const_use_iterator</tt>
   2278 and <tt>Value::const_op_iterator</tt>.  They automatically arise when
   2279 calling <tt>use/op_begin()</tt> on <tt>const Value*</tt>s or
   2280 <tt>const User*</tt>s respectively.  Upon dereferencing, they return
   2281 <tt>const Use*</tt>s. Otherwise the above patterns remain unchanged.</p>
   2282 
   2283 </div>
   2284 
   2285 <!--_______________________________________________________________________-->
   2286 <h4>
   2287   <a name="iterate_preds">Iterating over predecessors &amp;
   2288 successors of blocks</a>
   2289 </h4>
   2290 
   2291 <div>
   2292 
   2293 <p>Iterating over the predecessors and successors of a block is quite easy
   2294 with the routines defined in <tt>"llvm/Support/CFG.h"</tt>.  Just use code like
   2295 this to iterate over all predecessors of BB:</p>
   2296 
   2297 <div class="doc_code">
   2298 <pre>
   2299 #include "llvm/Support/CFG.h"
   2300 BasicBlock *BB = ...;
   2301 
   2302 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
   2303   BasicBlock *Pred = *PI;
   2304   // <i>...</i>
   2305 }
   2306 </pre>
   2307 </div>
   2308 
   2309 <p>Similarly, to iterate over successors use
   2310 succ_iterator/succ_begin/succ_end.</p>
   2311 
   2312 </div>
   2313 
   2314 </div>
   2315 
   2316 <!-- ======================================================================= -->
   2317 <h3>
   2318   <a name="simplechanges">Making simple changes</a>
   2319 </h3>
   2320 
   2321 <div>
   2322 
   2323 <p>There are some primitive transformation operations present in the LLVM
   2324 infrastructure that are worth knowing about.  When performing
   2325 transformations, it's fairly common to manipulate the contents of basic
   2326 blocks. This section describes some of the common methods for doing so
   2327 and gives example code.</p>
   2328 
   2329 <!--_______________________________________________________________________-->
   2330 <h4>
   2331   <a name="schanges_creating">Creating and inserting new
   2332   <tt>Instruction</tt>s</a>
   2333 </h4>
   2334 
   2335 <div>
   2336 
   2337 <p><i>Instantiating Instructions</i></p>
   2338 
   2339 <p>Creation of <tt>Instruction</tt>s is straight-forward: simply call the
   2340 constructor for the kind of instruction to instantiate and provide the necessary
   2341 parameters. For example, an <tt>AllocaInst</tt> only <i>requires</i> a
   2342 (const-ptr-to) <tt>Type</tt>. Thus:</p> 
   2343 
   2344 <div class="doc_code">
   2345 <pre>
   2346 AllocaInst* ai = new AllocaInst(Type::Int32Ty);
   2347 </pre>
   2348 </div>
   2349 
   2350 <p>will create an <tt>AllocaInst</tt> instance that represents the allocation of
   2351 one integer in the current stack frame, at run time. Each <tt>Instruction</tt>
   2352 subclass is likely to have varying default parameters which change the semantics
   2353 of the instruction, so refer to the <a
   2354 href="/doxygen/classllvm_1_1Instruction.html">doxygen documentation for the subclass of
   2355 Instruction</a> that you're interested in instantiating.</p>
   2356 
   2357 <p><i>Naming values</i></p>
   2358 
   2359 <p>It is very useful to name the values of instructions when you're able to, as
   2360 this facilitates the debugging of your transformations.  If you end up looking
   2361 at generated LLVM machine code, you definitely want to have logical names
   2362 associated with the results of instructions!  By supplying a value for the
   2363 <tt>Name</tt> (default) parameter of the <tt>Instruction</tt> constructor, you
   2364 associate a logical name with the result of the instruction's execution at
   2365 run time.  For example, say that I'm writing a transformation that dynamically
   2366 allocates space for an integer on the stack, and that integer is going to be
   2367 used as some kind of index by some other code.  To accomplish this, I place an
   2368 <tt>AllocaInst</tt> at the first point in the first <tt>BasicBlock</tt> of some
   2369 <tt>Function</tt>, and I'm intending to use it within the same
   2370 <tt>Function</tt>. I might do:</p>
   2371 
   2372 <div class="doc_code">
   2373 <pre>
   2374 AllocaInst* pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
   2375 </pre>
   2376 </div>
   2377 
   2378 <p>where <tt>indexLoc</tt> is now the logical name of the instruction's
   2379 execution value, which is a pointer to an integer on the run time stack.</p>
   2380 
   2381 <p><i>Inserting instructions</i></p>
   2382 
   2383 <p>There are essentially two ways to insert an <tt>Instruction</tt>
   2384 into an existing sequence of instructions that form a <tt>BasicBlock</tt>:</p>
   2385 
   2386 <ul>
   2387   <li>Insertion into an explicit instruction list
   2388 
   2389     <p>Given a <tt>BasicBlock* pb</tt>, an <tt>Instruction* pi</tt> within that
   2390     <tt>BasicBlock</tt>, and a newly-created instruction we wish to insert
   2391     before <tt>*pi</tt>, we do the following: </p>
   2392 
   2393 <div class="doc_code">
   2394 <pre>
   2395 BasicBlock *pb = ...;
   2396 Instruction *pi = ...;
   2397 Instruction *newInst = new Instruction(...);
   2398 
   2399 pb-&gt;getInstList().insert(pi, newInst); // <i>Inserts newInst before pi in pb</i>
   2400 </pre>
   2401 </div>
   2402 
   2403     <p>Appending to the end of a <tt>BasicBlock</tt> is so common that
   2404     the <tt>Instruction</tt> class and <tt>Instruction</tt>-derived
   2405     classes provide constructors which take a pointer to a
   2406     <tt>BasicBlock</tt> to be appended to. For example code that
   2407     looked like: </p>
   2408 
   2409 <div class="doc_code">
   2410 <pre>
   2411 BasicBlock *pb = ...;
   2412 Instruction *newInst = new Instruction(...);
   2413 
   2414 pb-&gt;getInstList().push_back(newInst); // <i>Appends newInst to pb</i>
   2415 </pre>
   2416 </div>
   2417 
   2418     <p>becomes: </p>
   2419 
   2420 <div class="doc_code">
   2421 <pre>
   2422 BasicBlock *pb = ...;
   2423 Instruction *newInst = new Instruction(..., pb);
   2424 </pre>
   2425 </div>
   2426 
   2427     <p>which is much cleaner, especially if you are creating
   2428     long instruction streams.</p></li>
   2429 
   2430   <li>Insertion into an implicit instruction list
   2431 
   2432     <p><tt>Instruction</tt> instances that are already in <tt>BasicBlock</tt>s
   2433     are implicitly associated with an existing instruction list: the instruction
   2434     list of the enclosing basic block. Thus, we could have accomplished the same
   2435     thing as the above code without being given a <tt>BasicBlock</tt> by doing:
   2436     </p>
   2437 
   2438 <div class="doc_code">
   2439 <pre>
   2440 Instruction *pi = ...;
   2441 Instruction *newInst = new Instruction(...);
   2442 
   2443 pi-&gt;getParent()-&gt;getInstList().insert(pi, newInst);
   2444 </pre>
   2445 </div>
   2446 
   2447     <p>In fact, this sequence of steps occurs so frequently that the
   2448     <tt>Instruction</tt> class and <tt>Instruction</tt>-derived classes provide
   2449     constructors which take (as a default parameter) a pointer to an
   2450     <tt>Instruction</tt> which the newly-created <tt>Instruction</tt> should
   2451     precede.  That is, <tt>Instruction</tt> constructors are capable of
   2452     inserting the newly-created instance into the <tt>BasicBlock</tt> of a
   2453     provided instruction, immediately before that instruction.  Using an
   2454     <tt>Instruction</tt> constructor with a <tt>insertBefore</tt> (default)
   2455     parameter, the above code becomes:</p>
   2456 
   2457 <div class="doc_code">
   2458 <pre>
   2459 Instruction* pi = ...;
   2460 Instruction* newInst = new Instruction(..., pi);
   2461 </pre>
   2462 </div>
   2463 
   2464     <p>which is much cleaner, especially if you're creating a lot of
   2465     instructions and adding them to <tt>BasicBlock</tt>s.</p></li>
   2466 </ul>
   2467 
   2468 </div>
   2469 
   2470 <!--_______________________________________________________________________-->
   2471 <h4>
   2472   <a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a>
   2473 </h4>
   2474 
   2475 <div>
   2476 
   2477 <p>Deleting an instruction from an existing sequence of instructions that form a
   2478 <a href="#BasicBlock"><tt>BasicBlock</tt></a> is very straight-forward: just
   2479 call the instruction's eraseFromParent() method.  For example:</p>
   2480 
   2481 <div class="doc_code">
   2482 <pre>
   2483 <a href="#Instruction">Instruction</a> *I = .. ;
   2484 I-&gt;eraseFromParent();
   2485 </pre>
   2486 </div>
   2487 
   2488 <p>This unlinks the instruction from its containing basic block and deletes 
   2489 it.  If you'd just like to unlink the instruction from its containing basic
   2490 block but not delete it, you can use the <tt>removeFromParent()</tt> method.</p>
   2491 
   2492 </div>
   2493 
   2494 <!--_______________________________________________________________________-->
   2495 <h4>
   2496   <a name="schanges_replacing">Replacing an <tt>Instruction</tt> with another
   2497   <tt>Value</tt></a>
   2498 </h4>
   2499 
   2500 <div>
   2501 
   2502 <p><i>Replacing individual instructions</i></p>
   2503 
   2504 <p>Including "<a href="/doxygen/BasicBlockUtils_8h-source.html">llvm/Transforms/Utils/BasicBlockUtils.h</a>"
   2505 permits use of two very useful replace functions: <tt>ReplaceInstWithValue</tt>
   2506 and <tt>ReplaceInstWithInst</tt>.</p>
   2507 
   2508 <h5><a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a></h5>
   2509 
   2510 <ul>
   2511   <li><tt>ReplaceInstWithValue</tt>
   2512 
   2513     <p>This function replaces all uses of a given instruction with a value,
   2514     and then removes the original instruction. The following example
   2515     illustrates the replacement of the result of a particular
   2516     <tt>AllocaInst</tt> that allocates memory for a single integer with a null
   2517     pointer to an integer.</p>
   2518 
   2519 <div class="doc_code">
   2520 <pre>
   2521 AllocaInst* instToReplace = ...;
   2522 BasicBlock::iterator ii(instToReplace);
   2523 
   2524 ReplaceInstWithValue(instToReplace-&gt;getParent()-&gt;getInstList(), ii,
   2525                      Constant::getNullValue(PointerType::getUnqual(Type::Int32Ty)));
   2526 </pre></div></li>
   2527 
   2528   <li><tt>ReplaceInstWithInst</tt> 
   2529 
   2530     <p>This function replaces a particular instruction with another
   2531     instruction, inserting the new instruction into the basic block at the
   2532     location where the old instruction was, and replacing any uses of the old
   2533     instruction with the new instruction. The following example illustrates
   2534     the replacement of one <tt>AllocaInst</tt> with another.</p>
   2535 
   2536 <div class="doc_code">
   2537 <pre>
   2538 AllocaInst* instToReplace = ...;
   2539 BasicBlock::iterator ii(instToReplace);
   2540 
   2541 ReplaceInstWithInst(instToReplace-&gt;getParent()-&gt;getInstList(), ii,
   2542                     new AllocaInst(Type::Int32Ty, 0, "ptrToReplacedInt"));
   2543 </pre></div></li>
   2544 </ul>
   2545 
   2546 <p><i>Replacing multiple uses of <tt>User</tt>s and <tt>Value</tt>s</i></p>
   2547 
   2548 <p>You can use <tt>Value::replaceAllUsesWith</tt> and
   2549 <tt>User::replaceUsesOfWith</tt> to change more than one use at a time.  See the
   2550 doxygen documentation for the <a href="/doxygen/classllvm_1_1Value.html">Value Class</a>
   2551 and <a href="/doxygen/classllvm_1_1User.html">User Class</a>, respectively, for more
   2552 information.</p>
   2553 
   2554 <!-- Value::replaceAllUsesWith User::replaceUsesOfWith Point out:
   2555 include/llvm/Transforms/Utils/ especially BasicBlockUtils.h with:
   2556 ReplaceInstWithValue, ReplaceInstWithInst -->
   2557 
   2558 </div>
   2559 
   2560 <!--_______________________________________________________________________-->
   2561 <h4>
   2562   <a name="schanges_deletingGV">Deleting <tt>GlobalVariable</tt>s</a>
   2563 </h4>
   2564 
   2565 <div>
   2566 
   2567 <p>Deleting a global variable from a module is just as easy as deleting an 
   2568 Instruction. First, you must have a pointer to the global variable that you wish
   2569  to delete.  You use this pointer to erase it from its parent, the module.
   2570  For example:</p>
   2571 
   2572 <div class="doc_code">
   2573 <pre>
   2574 <a href="#GlobalVariable">GlobalVariable</a> *GV = .. ;
   2575 
   2576 GV-&gt;eraseFromParent();
   2577 </pre>
   2578 </div>
   2579 
   2580 </div>
   2581 
   2582 </div>
   2583 
   2584 <!-- ======================================================================= -->
   2585 <h3>
   2586   <a name="create_types">How to Create Types</a>
   2587 </h3>
   2588 
   2589 <div>
   2590 
   2591 <p>In generating IR, you may need some complex types.  If you know these types
   2592 statically, you can use <tt>TypeBuilder&lt;...&gt;::get()</tt>, defined
   2593 in <tt>llvm/Support/TypeBuilder.h</tt>, to retrieve them.  <tt>TypeBuilder</tt>
   2594 has two forms depending on whether you're building types for cross-compilation
   2595 or native library use.  <tt>TypeBuilder&lt;T, true&gt;</tt> requires
   2596 that <tt>T</tt> be independent of the host environment, meaning that it's built
   2597 out of types from
   2598 the <a href="/doxygen/namespacellvm_1_1types.html"><tt>llvm::types</tt></a>
   2599 namespace and pointers, functions, arrays, etc. built of
   2600 those.  <tt>TypeBuilder&lt;T, false&gt;</tt> additionally allows native C types
   2601 whose size may depend on the host compiler.  For example,</p>
   2602 
   2603 <div class="doc_code">
   2604 <pre>
   2605 FunctionType *ft = TypeBuilder&lt;types::i&lt;8&gt;(types::i&lt;32&gt;*), true&gt;::get();
   2606 </pre>
   2607 </div>
   2608 
   2609 <p>is easier to read and write than the equivalent</p>
   2610 
   2611 <div class="doc_code">
   2612 <pre>
   2613 std::vector&lt;const Type*&gt; params;
   2614 params.push_back(PointerType::getUnqual(Type::Int32Ty));
   2615 FunctionType *ft = FunctionType::get(Type::Int8Ty, params, false);
   2616 </pre>
   2617 </div>
   2618 
   2619 <p>See the <a href="/doxygen/TypeBuilder_8h-source.html#l00001">class
   2620 comment</a> for more details.</p>
   2621 
   2622 </div>
   2623 
   2624 </div>
   2625 
   2626 <!-- *********************************************************************** -->
   2627 <h2>
   2628   <a name="threading">Threads and LLVM</a>
   2629 </h2>
   2630 <!-- *********************************************************************** -->
   2631 
   2632 <div>
   2633 <p>
   2634 This section describes the interaction of the LLVM APIs with multithreading,
   2635 both on the part of client applications, and in the JIT, in the hosted
   2636 application.
   2637 </p>
   2638 
   2639 <p>
   2640 Note that LLVM's support for multithreading is still relatively young.  Up 
   2641 through version 2.5, the execution of threaded hosted applications was
   2642 supported, but not threaded client access to the APIs.  While this use case is
   2643 now supported, clients <em>must</em> adhere to the guidelines specified below to
   2644 ensure proper operation in multithreaded mode.
   2645 </p>
   2646 
   2647 <p>
   2648 Note that, on Unix-like platforms, LLVM requires the presence of GCC's atomic
   2649 intrinsics in order to support threaded operation.  If you need a
   2650 multhreading-capable LLVM on a platform without a suitably modern system
   2651 compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and 
   2652 using the resultant compiler to build a copy of LLVM with multithreading
   2653 support.
   2654 </p>
   2655 
   2656 <!-- ======================================================================= -->
   2657 <h3>
   2658   <a name="startmultithreaded">Entering and Exiting Multithreaded Mode</a>
   2659 </h3>
   2660 
   2661 <div>
   2662 
   2663 <p>
   2664 In order to properly protect its internal data structures while avoiding 
   2665 excessive locking overhead in the single-threaded case, the LLVM must intialize
   2666 certain data structures necessary to provide guards around its internals.  To do
   2667 so, the client program must invoke <tt>llvm_start_multithreaded()</tt> before
   2668 making any concurrent LLVM API calls.  To subsequently tear down these
   2669 structures, use the <tt>llvm_stop_multithreaded()</tt> call.  You can also use
   2670 the <tt>llvm_is_multithreaded()</tt> call to check the status of multithreaded
   2671 mode.
   2672 </p>
   2673 
   2674 <p>
   2675 Note that both of these calls must be made <em>in isolation</em>.  That is to
   2676 say that no other LLVM API calls may be executing at any time during the 
   2677 execution of <tt>llvm_start_multithreaded()</tt> or <tt>llvm_stop_multithreaded
   2678 </tt>.  It's is the client's responsibility to enforce this isolation.
   2679 </p>
   2680 
   2681 <p>
   2682 The return value of <tt>llvm_start_multithreaded()</tt> indicates the success or
   2683 failure of the initialization.  Failure typically indicates that your copy of
   2684 LLVM was built without multithreading support, typically because GCC atomic
   2685 intrinsics were not found in your system compiler.  In this case, the LLVM API
   2686 will not be safe for concurrent calls.  However, it <em>will</em> be safe for
   2687 hosting threaded applications in the JIT, though <a href="#jitthreading">care
   2688 must be taken</a> to ensure that side exits and the like do not accidentally
   2689 result in concurrent LLVM API calls.
   2690 </p>
   2691 </div>
   2692 
   2693 <!-- ======================================================================= -->
   2694 <h3>
   2695   <a name="shutdown">Ending Execution with <tt>llvm_shutdown()</tt></a>
   2696 </h3>
   2697 
   2698 <div>
   2699 <p>
   2700 When you are done using the LLVM APIs, you should call <tt>llvm_shutdown()</tt>
   2701 to deallocate memory used for internal structures.  This will also invoke 
   2702 <tt>llvm_stop_multithreaded()</tt> if LLVM is operating in multithreaded mode.
   2703 As such, <tt>llvm_shutdown()</tt> requires the same isolation guarantees as
   2704 <tt>llvm_stop_multithreaded()</tt>.
   2705 </p>
   2706 
   2707 <p>
   2708 Note that, if you use scope-based shutdown, you can use the
   2709 <tt>llvm_shutdown_obj</tt> class, which calls <tt>llvm_shutdown()</tt> in its
   2710 destructor.
   2711 </div>
   2712 
   2713 <!-- ======================================================================= -->
   2714 <h3>
   2715   <a name="managedstatic">Lazy Initialization with <tt>ManagedStatic</tt></a>
   2716 </h3>
   2717 
   2718 <div>
   2719 <p>
   2720 <tt>ManagedStatic</tt> is a utility class in LLVM used to implement static
   2721 initialization of static resources, such as the global type tables.  Before the
   2722 invocation of <tt>llvm_shutdown()</tt>, it implements a simple lazy 
   2723 initialization scheme.  Once <tt>llvm_start_multithreaded()</tt> returns,
   2724 however, it uses double-checked locking to implement thread-safe lazy
   2725 initialization.
   2726 </p>
   2727 
   2728 <p>
   2729 Note that, because no other threads are allowed to issue LLVM API calls before
   2730 <tt>llvm_start_multithreaded()</tt> returns, it is possible to have 
   2731 <tt>ManagedStatic</tt>s of <tt>llvm::sys::Mutex</tt>s.
   2732 </p>
   2733 
   2734 <p>
   2735 The <tt>llvm_acquire_global_lock()</tt> and <tt>llvm_release_global_lock</tt> 
   2736 APIs provide access to the global lock used to implement the double-checked
   2737 locking for lazy initialization.  These should only be used internally to LLVM,
   2738 and only if you know what you're doing!
   2739 </p>
   2740 </div>
   2741 
   2742 <!-- ======================================================================= -->
   2743 <h3>
   2744   <a name="llvmcontext">Achieving Isolation with <tt>LLVMContext</tt></a>
   2745 </h3>
   2746 
   2747 <div>
   2748 <p>
   2749 <tt>LLVMContext</tt> is an opaque class in the LLVM API which clients can use
   2750 to operate multiple, isolated instances of LLVM concurrently within the same
   2751 address space.  For instance, in a hypothetical compile-server, the compilation
   2752 of an individual translation unit is conceptually independent from all the 
   2753 others, and it would be desirable to be able to compile incoming translation 
   2754 units concurrently on independent server threads.  Fortunately, 
   2755 <tt>LLVMContext</tt> exists to enable just this kind of scenario!
   2756 </p>
   2757 
   2758 <p>
   2759 Conceptually, <tt>LLVMContext</tt> provides isolation.  Every LLVM entity 
   2760 (<tt>Module</tt>s, <tt>Value</tt>s, <tt>Type</tt>s, <tt>Constant</tt>s, etc.)
   2761 in LLVM's in-memory IR belongs to an <tt>LLVMContext</tt>.  Entities in 
   2762 different contexts <em>cannot</em> interact with each other: <tt>Module</tt>s in
   2763 different contexts cannot be linked together, <tt>Function</tt>s cannot be added
   2764 to <tt>Module</tt>s in different contexts, etc.  What this means is that is is
   2765 safe to compile on multiple threads simultaneously, as long as no two threads
   2766 operate on entities within the same context.
   2767 </p>
   2768 
   2769 <p>
   2770 In practice, very few places in the API require the explicit specification of a
   2771 <tt>LLVMContext</tt>, other than the <tt>Type</tt> creation/lookup APIs.
   2772 Because every <tt>Type</tt> carries a reference to its owning context, most
   2773 other entities can determine what context they belong to by looking at their
   2774 own <tt>Type</tt>.  If you are adding new entities to LLVM IR, please try to
   2775 maintain this interface design.
   2776 </p>
   2777 
   2778 <p>
   2779 For clients that do <em>not</em> require the benefits of isolation, LLVM 
   2780 provides a convenience API <tt>getGlobalContext()</tt>.  This returns a global,
   2781 lazily initialized <tt>LLVMContext</tt> that may be used in situations where
   2782 isolation is not a concern.
   2783 </p>
   2784 </div>
   2785 
   2786 <!-- ======================================================================= -->
   2787 <h3>
   2788   <a name="jitthreading">Threads and the JIT</a>
   2789 </h3>
   2790 
   2791 <div>
   2792 <p>
   2793 LLVM's "eager" JIT compiler is safe to use in threaded programs.  Multiple
   2794 threads can call <tt>ExecutionEngine::getPointerToFunction()</tt> or
   2795 <tt>ExecutionEngine::runFunction()</tt> concurrently, and multiple threads can
   2796 run code output by the JIT concurrently.  The user must still ensure that only
   2797 one thread accesses IR in a given <tt>LLVMContext</tt> while another thread
   2798 might be modifying it.  One way to do that is to always hold the JIT lock while
   2799 accessing IR outside the JIT (the JIT <em>modifies</em> the IR by adding
   2800 <tt>CallbackVH</tt>s).  Another way is to only
   2801 call <tt>getPointerToFunction()</tt> from the <tt>LLVMContext</tt>'s thread.
   2802 </p>
   2803 
   2804 <p>When the JIT is configured to compile lazily (using
   2805 <tt>ExecutionEngine::DisableLazyCompilation(false)</tt>), there is currently a
   2806 <a href="http://llvm.org/bugs/show_bug.cgi?id=5184">race condition</a> in
   2807 updating call sites after a function is lazily-jitted.  It's still possible to
   2808 use the lazy JIT in a threaded program if you ensure that only one thread at a
   2809 time can call any particular lazy stub and that the JIT lock guards any IR
   2810 access, but we suggest using only the eager JIT in threaded programs.
   2811 </p>
   2812 </div>
   2813 
   2814 </div>
   2815 
   2816 <!-- *********************************************************************** -->
   2817 <h2>
   2818   <a name="advanced">Advanced Topics</a>
   2819 </h2>
   2820 <!-- *********************************************************************** -->
   2821 
   2822 <div>
   2823 <p>
   2824 This section describes some of the advanced or obscure API's that most clients
   2825 do not need to be aware of.  These API's tend manage the inner workings of the
   2826 LLVM system, and only need to be accessed in unusual circumstances.
   2827 </p>
   2828 
   2829   
   2830 <!-- ======================================================================= -->
   2831 <h3>
   2832   <a name="SymbolTable">The <tt>ValueSymbolTable</tt> class</a>
   2833 </h3>
   2834 
   2835 <div>
   2836 <p>The <tt><a href="http://llvm.org/doxygen/classllvm_1_1ValueSymbolTable.html">
   2837 ValueSymbolTable</a></tt> class provides a symbol table that the <a
   2838 href="#Function"><tt>Function</tt></a> and <a href="#Module">
   2839 <tt>Module</tt></a> classes use for naming value definitions. The symbol table
   2840 can provide a name for any <a href="#Value"><tt>Value</tt></a>. 
   2841 </p>
   2842 
   2843 <p>Note that the <tt>SymbolTable</tt> class should not be directly accessed 
   2844 by most clients.  It should only be used when iteration over the symbol table 
   2845 names themselves are required, which is very special purpose.  Note that not 
   2846 all LLVM
   2847 <tt><a href="#Value">Value</a></tt>s have names, and those without names (i.e. they have
   2848 an empty name) do not exist in the symbol table.
   2849 </p>
   2850 
   2851 <p>Symbol tables support iteration over the values in the symbol
   2852 table with <tt>begin/end/iterator</tt> and supports querying to see if a
   2853 specific name is in the symbol table (with <tt>lookup</tt>).  The
   2854 <tt>ValueSymbolTable</tt> class exposes no public mutator methods, instead,
   2855 simply call <tt>setName</tt> on a value, which will autoinsert it into the
   2856 appropriate symbol table.</p>
   2857 
   2858 </div>
   2859 
   2860 
   2861 
   2862 <!-- ======================================================================= -->
   2863 <h3>
   2864   <a name="UserLayout">The <tt>User</tt> and owned <tt>Use</tt> classes' memory layout</a>
   2865 </h3>
   2866 
   2867 <div>
   2868 <p>The <tt><a href="http://llvm.org/doxygen/classllvm_1_1User.html">
   2869 User</a></tt> class provides a basis for expressing the ownership of <tt>User</tt>
   2870 towards other <tt><a href="http://llvm.org/doxygen/classllvm_1_1Value.html">
   2871 Value</a></tt>s. The <tt><a href="http://llvm.org/doxygen/classllvm_1_1Use.html">
   2872 Use</a></tt> helper class is employed to do the bookkeeping and to facilitate <i>O(1)</i>
   2873 addition and removal.</p>
   2874 
   2875 <!-- ______________________________________________________________________ -->
   2876 <h4>
   2877   <a name="Use2User">
   2878     Interaction and relationship between <tt>User</tt> and <tt>Use</tt> objects
   2879   </a>
   2880 </h4>
   2881 
   2882 <div>
   2883 <p>
   2884 A subclass of <tt>User</tt> can choose between incorporating its <tt>Use</tt> objects
   2885 or refer to them out-of-line by means of a pointer. A mixed variant
   2886 (some <tt>Use</tt>s inline others hung off) is impractical and breaks the invariant
   2887 that the <tt>Use</tt> objects belonging to the same <tt>User</tt> form a contiguous array.
   2888 </p>
   2889 
   2890 <p>
   2891 We have 2 different layouts in the <tt>User</tt> (sub)classes:
   2892 <ul>
   2893 <li><p>Layout a)
   2894 The <tt>Use</tt> object(s) are inside (resp. at fixed offset) of the <tt>User</tt>
   2895 object and there are a fixed number of them.</p>
   2896 
   2897 <li><p>Layout b)
   2898 The <tt>Use</tt> object(s) are referenced by a pointer to an
   2899 array from the <tt>User</tt> object and there may be a variable
   2900 number of them.</p>
   2901 </ul>
   2902 <p>
   2903 As of v2.4 each layout still possesses a direct pointer to the
   2904 start of the array of <tt>Use</tt>s. Though not mandatory for layout a),
   2905 we stick to this redundancy for the sake of simplicity.
   2906 The <tt>User</tt> object also stores the number of <tt>Use</tt> objects it
   2907 has. (Theoretically this information can also be calculated
   2908 given the scheme presented below.)</p>
   2909 <p>
   2910 Special forms of allocation operators (<tt>operator new</tt>)
   2911 enforce the following memory layouts:</p>
   2912 
   2913 <ul>
   2914 <li><p>Layout a) is modelled by prepending the <tt>User</tt> object by the <tt>Use[]</tt> array.</p>
   2915 
   2916 <pre>
   2917 ...---.---.---.---.-------...
   2918   | P | P | P | P | User
   2919 '''---'---'---'---'-------'''
   2920 </pre>
   2921 
   2922 <li><p>Layout b) is modelled by pointing at the <tt>Use[]</tt> array.</p>
   2923 <pre>
   2924 .-------...
   2925 | User
   2926 '-------'''
   2927     |
   2928     v
   2929     .---.---.---.---...
   2930     | P | P | P | P |
   2931     '---'---'---'---'''
   2932 </pre>
   2933 </ul>
   2934 <i>(In the above figures '<tt>P</tt>' stands for the <tt>Use**</tt> that
   2935     is stored in each <tt>Use</tt> object in the member <tt>Use::Prev</tt>)</i>
   2936 
   2937 </div>
   2938 
   2939 <!-- ______________________________________________________________________ -->
   2940 <h4>
   2941   <a name="Waymarking">The waymarking algorithm</a>
   2942 </h4>
   2943 
   2944 <div>
   2945 <p>
   2946 Since the <tt>Use</tt> objects are deprived of the direct (back)pointer to
   2947 their <tt>User</tt> objects, there must be a fast and exact method to
   2948 recover it. This is accomplished by the following scheme:</p>
   2949 
   2950 A bit-encoding in the 2 LSBits (least significant bits) of the <tt>Use::Prev</tt> allows to find the
   2951 start of the <tt>User</tt> object:
   2952 <ul>
   2953 <li><tt>00</tt> &mdash;&gt; binary digit 0</li>
   2954 <li><tt>01</tt> &mdash;&gt; binary digit 1</li>
   2955 <li><tt>10</tt> &mdash;&gt; stop and calculate (<tt>s</tt>)</li>
   2956 <li><tt>11</tt> &mdash;&gt; full stop (<tt>S</tt>)</li>
   2957 </ul>
   2958 <p>
   2959 Given a <tt>Use*</tt>, all we have to do is to walk till we get
   2960 a stop and we either have a <tt>User</tt> immediately behind or
   2961 we have to walk to the next stop picking up digits
   2962 and calculating the offset:</p>
   2963 <pre>
   2964 .---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.----------------
   2965 | 1 | s | 1 | 0 | 1 | 0 | s | 1 | 1 | 0 | s | 1 | 1 | s | 1 | S | User (or User*)
   2966 '---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'----------------
   2967     |+15                |+10            |+6         |+3     |+1
   2968     |                   |               |           |       |__>
   2969     |                   |               |           |__________>
   2970     |                   |               |______________________>
   2971     |                   |______________________________________>
   2972     |__________________________________________________________>
   2973 </pre>
   2974 <p>
   2975 Only the significant number of bits need to be stored between the
   2976 stops, so that the <i>worst case is 20 memory accesses</i> when there are
   2977 1000 <tt>Use</tt> objects associated with a <tt>User</tt>.</p>
   2978 
   2979 </div>
   2980 
   2981 <!-- ______________________________________________________________________ -->
   2982 <h4>
   2983   <a name="ReferenceImpl">Reference implementation</a>
   2984 </h4>
   2985 
   2986 <div>
   2987 <p>
   2988 The following literate Haskell fragment demonstrates the concept:</p>
   2989 
   2990 <div class="doc_code">
   2991 <pre>
   2992 > import Test.QuickCheck
   2993 > 
   2994 > digits :: Int -> [Char] -> [Char]
   2995 > digits 0 acc = '0' : acc
   2996 > digits 1 acc = '1' : acc
   2997 > digits n acc = digits (n `div` 2) $ digits (n `mod` 2) acc
   2998 > 
   2999 > dist :: Int -> [Char] -> [Char]
   3000 > dist 0 [] = ['S']
   3001 > dist 0 acc = acc
   3002 > dist 1 acc = let r = dist 0 acc in 's' : digits (length r) r
   3003 > dist n acc = dist (n - 1) $ dist 1 acc
   3004 > 
   3005 > takeLast n ss = reverse $ take n $ reverse ss
   3006 > 
   3007 > test = takeLast 40 $ dist 20 []
   3008 > 
   3009 </pre>
   3010 </div>
   3011 <p>
   3012 Printing &lt;test&gt; gives: <tt>"1s100000s11010s10100s1111s1010s110s11s1S"</tt></p>
   3013 <p>
   3014 The reverse algorithm computes the length of the string just by examining
   3015 a certain prefix:</p>
   3016 
   3017 <div class="doc_code">
   3018 <pre>
   3019 > pref :: [Char] -> Int
   3020 > pref "S" = 1
   3021 > pref ('s':'1':rest) = decode 2 1 rest
   3022 > pref (_:rest) = 1 + pref rest
   3023 > 
   3024 > decode walk acc ('0':rest) = decode (walk + 1) (acc * 2) rest
   3025 > decode walk acc ('1':rest) = decode (walk + 1) (acc * 2 + 1) rest
   3026 > decode walk acc _ = walk + acc
   3027 > 
   3028 </pre>
   3029 </div>
   3030 <p>
   3031 Now, as expected, printing &lt;pref test&gt; gives <tt>40</tt>.</p>
   3032 <p>
   3033 We can <i>quickCheck</i> this with following property:</p>
   3034 
   3035 <div class="doc_code">
   3036 <pre>
   3037 > testcase = dist 2000 []
   3038 > testcaseLength = length testcase
   3039 > 
   3040 > identityProp n = n > 0 && n <= testcaseLength ==> length arr == pref arr
   3041 >     where arr = takeLast n testcase
   3042 > 
   3043 </pre>
   3044 </div>
   3045 <p>
   3046 As expected &lt;quickCheck identityProp&gt; gives:</p>
   3047 
   3048 <pre>
   3049 *Main> quickCheck identityProp
   3050 OK, passed 100 tests.
   3051 </pre>
   3052 <p>
   3053 Let's be a bit more exhaustive:</p>
   3054 
   3055 <div class="doc_code">
   3056 <pre>
   3057 > 
   3058 > deepCheck p = check (defaultConfig { configMaxTest = 500 }) p
   3059 > 
   3060 </pre>
   3061 </div>
   3062 <p>
   3063 And here is the result of &lt;deepCheck identityProp&gt;:</p>
   3064 
   3065 <pre>
   3066 *Main> deepCheck identityProp
   3067 OK, passed 500 tests.
   3068 </pre>
   3069 
   3070 </div>
   3071 
   3072 <!-- ______________________________________________________________________ -->
   3073 <h4>
   3074   <a name="Tagging">Tagging considerations</a>
   3075 </h4>
   3076 
   3077 <div>
   3078 
   3079 <p>
   3080 To maintain the invariant that the 2 LSBits of each <tt>Use**</tt> in <tt>Use</tt>
   3081 never change after being set up, setters of <tt>Use::Prev</tt> must re-tag the
   3082 new <tt>Use**</tt> on every modification. Accordingly getters must strip the
   3083 tag bits.</p>
   3084 <p>
   3085 For layout b) instead of the <tt>User</tt> we find a pointer (<tt>User*</tt> with LSBit set).
   3086 Following this pointer brings us to the <tt>User</tt>. A portable trick ensures
   3087 that the first bytes of <tt>User</tt> (if interpreted as a pointer) never has
   3088 the LSBit set. (Portability is relying on the fact that all known compilers place the
   3089 <tt>vptr</tt> in the first word of the instances.)</p>
   3090 
   3091 </div>
   3092 
   3093 </div>
   3094 
   3095 </div>
   3096 
   3097 <!-- *********************************************************************** -->
   3098 <h2>
   3099   <a name="coreclasses">The Core LLVM Class Hierarchy Reference </a>
   3100 </h2>
   3101 <!-- *********************************************************************** -->
   3102 
   3103 <div>
   3104 <p><tt>#include "<a href="/doxygen/Type_8h-source.html">llvm/Type.h</a>"</tt>
   3105 <br>doxygen info: <a href="/doxygen/classllvm_1_1Type.html">Type Class</a></p>
   3106 
   3107 <p>The Core LLVM classes are the primary means of representing the program
   3108 being inspected or transformed.  The core LLVM classes are defined in
   3109 header files in the <tt>include/llvm/</tt> directory, and implemented in
   3110 the <tt>lib/VMCore</tt> directory.</p>
   3111 
   3112 <!-- ======================================================================= -->
   3113 <h3>
   3114   <a name="Type">The <tt>Type</tt> class and Derived Types</a>
   3115 </h3>
   3116 
   3117 <div>
   3118 
   3119   <p><tt>Type</tt> is a superclass of all type classes. Every <tt>Value</tt> has
   3120   a <tt>Type</tt>. <tt>Type</tt> cannot be instantiated directly but only
   3121   through its subclasses. Certain primitive types (<tt>VoidType</tt>,
   3122   <tt>LabelType</tt>, <tt>FloatType</tt> and <tt>DoubleType</tt>) have hidden 
   3123   subclasses. They are hidden because they offer no useful functionality beyond
   3124   what the <tt>Type</tt> class offers except to distinguish themselves from 
   3125   other subclasses of <tt>Type</tt>.</p>
   3126   <p>All other types are subclasses of <tt>DerivedType</tt>.  Types can be 
   3127   named, but this is not a requirement. There exists exactly 
   3128   one instance of a given shape at any one time.  This allows type equality to
   3129   be performed with address equality of the Type Instance. That is, given two 
   3130   <tt>Type*</tt> values, the types are identical if the pointers are identical.
   3131   </p>
   3132 
   3133 <!-- _______________________________________________________________________ -->
   3134 <h4>
   3135   <a name="m_Type">Important Public Methods</a>
   3136 </h4>
   3137 
   3138 <div>
   3139 
   3140 <ul>
   3141   <li><tt>bool isIntegerTy() const</tt>: Returns true for any integer type.</li>
   3142 
   3143   <li><tt>bool isFloatingPointTy()</tt>: Return true if this is one of the five
   3144   floating point types.</li>
   3145 
   3146   <li><tt>bool isSized()</tt>: Return true if the type has known size. Things
   3147   that don't have a size are abstract types, labels and void.</li>
   3148 
   3149 </ul>
   3150 </div>
   3151 
   3152 <!-- _______________________________________________________________________ -->
   3153 <h4>
   3154   <a name="derivedtypes">Important Derived Types</a>
   3155 </h4>
   3156 <div>
   3157 <dl>
   3158   <dt><tt>IntegerType</tt></dt>
   3159   <dd>Subclass of DerivedType that represents integer types of any bit width. 
   3160   Any bit width between <tt>IntegerType::MIN_INT_BITS</tt> (1) and 
   3161   <tt>IntegerType::MAX_INT_BITS</tt> (~8 million) can be represented.
   3162   <ul>
   3163     <li><tt>static const IntegerType* get(unsigned NumBits)</tt>: get an integer
   3164     type of a specific bit width.</li>
   3165     <li><tt>unsigned getBitWidth() const</tt>: Get the bit width of an integer
   3166     type.</li>
   3167   </ul>
   3168   </dd>
   3169   <dt><tt>SequentialType</tt></dt>
   3170   <dd>This is subclassed by ArrayType, PointerType and VectorType.
   3171     <ul>
   3172       <li><tt>const Type * getElementType() const</tt>: Returns the type of each
   3173       of the elements in the sequential type. </li>
   3174     </ul>
   3175   </dd>
   3176   <dt><tt>ArrayType</tt></dt>
   3177   <dd>This is a subclass of SequentialType and defines the interface for array 
   3178   types.
   3179     <ul>
   3180       <li><tt>unsigned getNumElements() const</tt>: Returns the number of 
   3181       elements in the array. </li>
   3182     </ul>
   3183   </dd>
   3184   <dt><tt>PointerType</tt></dt>
   3185   <dd>Subclass of SequentialType for pointer types.</dd>
   3186   <dt><tt>VectorType</tt></dt>
   3187   <dd>Subclass of SequentialType for vector types. A 
   3188   vector type is similar to an ArrayType but is distinguished because it is 
   3189   a first class type whereas ArrayType is not. Vector types are used for 
   3190   vector operations and are usually small vectors of of an integer or floating 
   3191   point type.</dd>
   3192   <dt><tt>StructType</tt></dt>
   3193   <dd>Subclass of DerivedTypes for struct types.</dd>
   3194   <dt><tt><a name="FunctionType">FunctionType</a></tt></dt>
   3195   <dd>Subclass of DerivedTypes for function types.
   3196     <ul>
   3197       <li><tt>bool isVarArg() const</tt>: Returns true if it's a vararg
   3198       function</li>
   3199       <li><tt> const Type * getReturnType() const</tt>: Returns the
   3200       return type of the function.</li>
   3201       <li><tt>const Type * getParamType (unsigned i)</tt>: Returns
   3202       the type of the ith parameter.</li>
   3203       <li><tt> const unsigned getNumParams() const</tt>: Returns the
   3204       number of formal parameters.</li>
   3205     </ul>
   3206   </dd>
   3207 </dl>
   3208 </div>
   3209 
   3210 </div>
   3211 
   3212 <!-- ======================================================================= -->
   3213 <h3>
   3214   <a name="Module">The <tt>Module</tt> class</a>
   3215 </h3>
   3216 
   3217 <div>
   3218 
   3219 <p><tt>#include "<a
   3220 href="/doxygen/Module_8h-source.html">llvm/Module.h</a>"</tt><br> doxygen info:
   3221 <a href="/doxygen/classllvm_1_1Module.html">Module Class</a></p>
   3222 
   3223 <p>The <tt>Module</tt> class represents the top level structure present in LLVM
   3224 programs.  An LLVM module is effectively either a translation unit of the
   3225 original program or a combination of several translation units merged by the
   3226 linker.  The <tt>Module</tt> class keeps track of a list of <a
   3227 href="#Function"><tt>Function</tt></a>s, a list of <a
   3228 href="#GlobalVariable"><tt>GlobalVariable</tt></a>s, and a <a
   3229 href="#SymbolTable"><tt>SymbolTable</tt></a>.  Additionally, it contains a few
   3230 helpful member functions that try to make common operations easy.</p>
   3231 
   3232 <!-- _______________________________________________________________________ -->
   3233 <h4>
   3234   <a name="m_Module">Important Public Members of the <tt>Module</tt> class</a>
   3235 </h4>
   3236 
   3237 <div>
   3238 
   3239 <ul>
   3240   <li><tt>Module::Module(std::string name = "")</tt></li>
   3241 </ul>
   3242 
   3243 <p>Constructing a <a href="#Module">Module</a> is easy. You can optionally
   3244 provide a name for it (probably based on the name of the translation unit).</p>
   3245 
   3246 <ul>
   3247   <li><tt>Module::iterator</tt> - Typedef for function list iterator<br>
   3248     <tt>Module::const_iterator</tt> - Typedef for const_iterator.<br>
   3249 
   3250     <tt>begin()</tt>, <tt>end()</tt>
   3251     <tt>size()</tt>, <tt>empty()</tt>
   3252 
   3253     <p>These are forwarding methods that make it easy to access the contents of
   3254     a <tt>Module</tt> object's <a href="#Function"><tt>Function</tt></a>
   3255     list.</p></li>
   3256 
   3257   <li><tt>Module::FunctionListType &amp;getFunctionList()</tt>
   3258 
   3259     <p> Returns the list of <a href="#Function"><tt>Function</tt></a>s.  This is
   3260     necessary to use when you need to update the list or perform a complex
   3261     action that doesn't have a forwarding method.</p>
   3262 
   3263     <p><!--  Global Variable --></p></li> 
   3264 </ul>
   3265 
   3266 <hr>
   3267 
   3268 <ul>
   3269   <li><tt>Module::global_iterator</tt> - Typedef for global variable list iterator<br>
   3270 
   3271     <tt>Module::const_global_iterator</tt> - Typedef for const_iterator.<br>
   3272 
   3273     <tt>global_begin()</tt>, <tt>global_end()</tt>
   3274     <tt>global_size()</tt>, <tt>global_empty()</tt>
   3275 
   3276     <p> These are forwarding methods that make it easy to access the contents of
   3277     a <tt>Module</tt> object's <a
   3278     href="#GlobalVariable"><tt>GlobalVariable</tt></a> list.</p></li>
   3279 
   3280   <li><tt>Module::GlobalListType &amp;getGlobalList()</tt>
   3281 
   3282     <p>Returns the list of <a
   3283     href="#GlobalVariable"><tt>GlobalVariable</tt></a>s.  This is necessary to
   3284     use when you need to update the list or perform a complex action that
   3285     doesn't have a forwarding method.</p>
   3286 
   3287     <p><!--  Symbol table stuff --> </p></li>
   3288 </ul>
   3289 
   3290 <hr>
   3291 
   3292 <ul>
   3293   <li><tt><a href="#SymbolTable">SymbolTable</a> *getSymbolTable()</tt>
   3294 
   3295     <p>Return a reference to the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
   3296     for this <tt>Module</tt>.</p>
   3297 
   3298     <p><!--  Convenience methods --></p></li>
   3299 </ul>
   3300 
   3301 <hr>
   3302 
   3303 <ul>
   3304   <li><tt><a href="#Function">Function</a> *getFunction(const std::string
   3305   &amp;Name, const <a href="#FunctionType">FunctionType</a> *Ty)</tt>
   3306 
   3307     <p>Look up the specified function in the <tt>Module</tt> <a
   3308     href="#SymbolTable"><tt>SymbolTable</tt></a>. If it does not exist, return
   3309     <tt>null</tt>.</p></li>
   3310 
   3311   <li><tt><a href="#Function">Function</a> *getOrInsertFunction(const
   3312   std::string &amp;Name, const <a href="#FunctionType">FunctionType</a> *T)</tt>
   3313 
   3314     <p>Look up the specified function in the <tt>Module</tt> <a
   3315     href="#SymbolTable"><tt>SymbolTable</tt></a>. If it does not exist, add an
   3316     external declaration for the function and return it.</p></li>
   3317 
   3318   <li><tt>std::string getTypeName(const <a href="#Type">Type</a> *Ty)</tt>
   3319 
   3320     <p>If there is at least one entry in the <a
   3321     href="#SymbolTable"><tt>SymbolTable</tt></a> for the specified <a
   3322     href="#Type"><tt>Type</tt></a>, return it.  Otherwise return the empty
   3323     string.</p></li>
   3324 
   3325   <li><tt>bool addTypeName(const std::string &amp;Name, const <a
   3326   href="#Type">Type</a> *Ty)</tt>
   3327 
   3328     <p>Insert an entry in the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
   3329     mapping <tt>Name</tt> to <tt>Ty</tt>. If there is already an entry for this
   3330     name, true is returned and the <a
   3331     href="#SymbolTable"><tt>SymbolTable</tt></a> is not modified.</p></li>
   3332 </ul>
   3333 
   3334 </div>
   3335 
   3336 </div>
   3337 
   3338 <!-- ======================================================================= -->
   3339 <h3>
   3340   <a name="Value">The <tt>Value</tt> class</a>
   3341 </h3>
   3342 
   3343 <div>
   3344 
   3345 <p><tt>#include "<a href="/doxygen/Value_8h-source.html">llvm/Value.h</a>"</tt>
   3346 <br> 
   3347 doxygen info: <a href="/doxygen/classllvm_1_1Value.html">Value Class</a></p>
   3348 
   3349 <p>The <tt>Value</tt> class is the most important class in the LLVM Source
   3350 base.  It represents a typed value that may be used (among other things) as an
   3351 operand to an instruction.  There are many different types of <tt>Value</tt>s,
   3352 such as <a href="#Constant"><tt>Constant</tt></a>s,<a
   3353 href="#Argument"><tt>Argument</tt></a>s. Even <a
   3354 href="#Instruction"><tt>Instruction</tt></a>s and <a
   3355 href="#Function"><tt>Function</tt></a>s are <tt>Value</tt>s.</p>
   3356 
   3357 <p>A particular <tt>Value</tt> may be used many times in the LLVM representation
   3358 for a program.  For example, an incoming argument to a function (represented
   3359 with an instance of the <a href="#Argument">Argument</a> class) is "used" by
   3360 every instruction in the function that references the argument.  To keep track
   3361 of this relationship, the <tt>Value</tt> class keeps a list of all of the <a
   3362 href="#User"><tt>User</tt></a>s that is using it (the <a
   3363 href="#User"><tt>User</tt></a> class is a base class for all nodes in the LLVM
   3364 graph that can refer to <tt>Value</tt>s).  This use list is how LLVM represents
   3365 def-use information in the program, and is accessible through the <tt>use_</tt>*
   3366 methods, shown below.</p>
   3367 
   3368 <p>Because LLVM is a typed representation, every LLVM <tt>Value</tt> is typed,
   3369 and this <a href="#Type">Type</a> is available through the <tt>getType()</tt>
   3370 method. In addition, all LLVM values can be named.  The "name" of the
   3371 <tt>Value</tt> is a symbolic string printed in the LLVM code:</p>
   3372 
   3373 <div class="doc_code">
   3374 <pre>
   3375 %<b>foo</b> = add i32 1, 2
   3376 </pre>
   3377 </div>
   3378 
   3379 <p><a name="nameWarning">The name of this instruction is "foo".</a> <b>NOTE</b>
   3380 that the name of any value may be missing (an empty string), so names should
   3381 <b>ONLY</b> be used for debugging (making the source code easier to read,
   3382 debugging printouts), they should not be used to keep track of values or map
   3383 between them.  For this purpose, use a <tt>std::map</tt> of pointers to the
   3384 <tt>Value</tt> itself instead.</p>
   3385 
   3386 <p>One important aspect of LLVM is that there is no distinction between an SSA
   3387 variable and the operation that produces it.  Because of this, any reference to
   3388 the value produced by an instruction (or the value available as an incoming
   3389 argument, for example) is represented as a direct pointer to the instance of
   3390 the class that
   3391 represents this value.  Although this may take some getting used to, it
   3392 simplifies the representation and makes it easier to manipulate.</p>
   3393 
   3394 <!-- _______________________________________________________________________ -->
   3395 <h4>
   3396   <a name="m_Value">Important Public Members of the <tt>Value</tt> class</a>
   3397 </h4>
   3398 
   3399 <div>
   3400 
   3401 <ul>
   3402   <li><tt>Value::use_iterator</tt> - Typedef for iterator over the
   3403 use-list<br>
   3404     <tt>Value::const_use_iterator</tt> - Typedef for const_iterator over
   3405 the use-list<br>
   3406     <tt>unsigned use_size()</tt> - Returns the number of users of the
   3407 value.<br>
   3408     <tt>bool use_empty()</tt> - Returns true if there are no users.<br>
   3409     <tt>use_iterator use_begin()</tt> - Get an iterator to the start of
   3410 the use-list.<br>
   3411     <tt>use_iterator use_end()</tt> - Get an iterator to the end of the
   3412 use-list.<br>
   3413     <tt><a href="#User">User</a> *use_back()</tt> - Returns the last
   3414 element in the list.
   3415     <p> These methods are the interface to access the def-use
   3416 information in LLVM.  As with all other iterators in LLVM, the naming
   3417 conventions follow the conventions defined by the <a href="#stl">STL</a>.</p>
   3418   </li>
   3419   <li><tt><a href="#Type">Type</a> *getType() const</tt>
   3420     <p>This method returns the Type of the Value.</p>
   3421   </li>
   3422   <li><tt>bool hasName() const</tt><br>
   3423     <tt>std::string getName() const</tt><br>
   3424     <tt>void setName(const std::string &amp;Name)</tt>
   3425     <p> This family of methods is used to access and assign a name to a <tt>Value</tt>,
   3426 be aware of the <a href="#nameWarning">precaution above</a>.</p>
   3427   </li>
   3428   <li><tt>void replaceAllUsesWith(Value *V)</tt>
   3429 
   3430     <p>This method traverses the use list of a <tt>Value</tt> changing all <a
   3431     href="#User"><tt>User</tt>s</a> of the current value to refer to
   3432     "<tt>V</tt>" instead.  For example, if you detect that an instruction always
   3433     produces a constant value (for example through constant folding), you can
   3434     replace all uses of the instruction with the constant like this:</p>
   3435 
   3436 <div class="doc_code">
   3437 <pre>
   3438 Inst-&gt;replaceAllUsesWith(ConstVal);
   3439 </pre>
   3440 </div>
   3441 
   3442 </ul>
   3443 
   3444 </div>
   3445 
   3446 </div>
   3447 
   3448 <!-- ======================================================================= -->
   3449 <h3>
   3450   <a name="User">The <tt>User</tt> class</a>
   3451 </h3>
   3452 
   3453 <div>
   3454   
   3455 <p>
   3456 <tt>#include "<a href="/doxygen/User_8h-source.html">llvm/User.h</a>"</tt><br>
   3457 doxygen info: <a href="/doxygen/classllvm_1_1User.html">User Class</a><br>
   3458 Superclass: <a href="#Value"><tt>Value</tt></a></p>
   3459 
   3460 <p>The <tt>User</tt> class is the common base class of all LLVM nodes that may
   3461 refer to <a href="#Value"><tt>Value</tt></a>s.  It exposes a list of "Operands"
   3462 that are all of the <a href="#Value"><tt>Value</tt></a>s that the User is
   3463 referring to.  The <tt>User</tt> class itself is a subclass of
   3464 <tt>Value</tt>.</p>
   3465 
   3466 <p>The operands of a <tt>User</tt> point directly to the LLVM <a
   3467 href="#Value"><tt>Value</tt></a> that it refers to.  Because LLVM uses Static
   3468 Single Assignment (SSA) form, there can only be one definition referred to,
   3469 allowing this direct connection.  This connection provides the use-def
   3470 information in LLVM.</p>
   3471 
   3472 <!-- _______________________________________________________________________ -->
   3473 <h4>
   3474   <a name="m_User">Important Public Members of the <tt>User</tt> class</a>
   3475 </h4>
   3476 
   3477 <div>
   3478 
   3479 <p>The <tt>User</tt> class exposes the operand list in two ways: through
   3480 an index access interface and through an iterator based interface.</p>
   3481 
   3482 <ul>
   3483   <li><tt>Value *getOperand(unsigned i)</tt><br>
   3484     <tt>unsigned getNumOperands()</tt>
   3485     <p> These two methods expose the operands of the <tt>User</tt> in a
   3486 convenient form for direct access.</p></li>
   3487 
   3488   <li><tt>User::op_iterator</tt> - Typedef for iterator over the operand
   3489 list<br>
   3490     <tt>op_iterator op_begin()</tt> - Get an iterator to the start of 
   3491 the operand list.<br>
   3492     <tt>op_iterator op_end()</tt> - Get an iterator to the end of the
   3493 operand list.
   3494     <p> Together, these methods make up the iterator based interface to
   3495 the operands of a <tt>User</tt>.</p></li>
   3496 </ul>
   3497 
   3498 </div>    
   3499 
   3500 </div>
   3501 
   3502 <!-- ======================================================================= -->
   3503 <h3>
   3504   <a name="Instruction">The <tt>Instruction</tt> class</a>
   3505 </h3>
   3506 
   3507 <div>
   3508 
   3509 <p><tt>#include "</tt><tt><a
   3510 href="/doxygen/Instruction_8h-source.html">llvm/Instruction.h</a>"</tt><br>
   3511 doxygen info: <a href="/doxygen/classllvm_1_1Instruction.html">Instruction Class</a><br>
   3512 Superclasses: <a href="#User"><tt>User</tt></a>, <a
   3513 href="#Value"><tt>Value</tt></a></p>
   3514 
   3515 <p>The <tt>Instruction</tt> class is the common base class for all LLVM
   3516 instructions.  It provides only a few methods, but is a very commonly used
   3517 class.  The primary data tracked by the <tt>Instruction</tt> class itself is the
   3518 opcode (instruction type) and the parent <a
   3519 href="#BasicBlock"><tt>BasicBlock</tt></a> the <tt>Instruction</tt> is embedded
   3520 into.  To represent a specific type of instruction, one of many subclasses of
   3521 <tt>Instruction</tt> are used.</p>
   3522 
   3523 <p> Because the <tt>Instruction</tt> class subclasses the <a
   3524 href="#User"><tt>User</tt></a> class, its operands can be accessed in the same
   3525 way as for other <a href="#User"><tt>User</tt></a>s (with the
   3526 <tt>getOperand()</tt>/<tt>getNumOperands()</tt> and
   3527 <tt>op_begin()</tt>/<tt>op_end()</tt> methods).</p> <p> An important file for
   3528 the <tt>Instruction</tt> class is the <tt>llvm/Instruction.def</tt> file. This
   3529 file contains some meta-data about the various different types of instructions
   3530 in LLVM.  It describes the enum values that are used as opcodes (for example
   3531 <tt>Instruction::Add</tt> and <tt>Instruction::ICmp</tt>), as well as the
   3532 concrete sub-classes of <tt>Instruction</tt> that implement the instruction (for
   3533 example <tt><a href="#BinaryOperator">BinaryOperator</a></tt> and <tt><a
   3534 href="#CmpInst">CmpInst</a></tt>).  Unfortunately, the use of macros in
   3535 this file confuses doxygen, so these enum values don't show up correctly in the
   3536 <a href="/doxygen/classllvm_1_1Instruction.html">doxygen output</a>.</p>
   3537 
   3538 <!-- _______________________________________________________________________ -->
   3539 <h4>
   3540   <a name="s_Instruction">
   3541     Important Subclasses of the <tt>Instruction</tt> class
   3542   </a>
   3543 </h4>
   3544 <div>
   3545   <ul>
   3546     <li><tt><a name="BinaryOperator">BinaryOperator</a></tt>
   3547     <p>This subclasses represents all two operand instructions whose operands
   3548     must be the same type, except for the comparison instructions.</p></li>
   3549     <li><tt><a name="CastInst">CastInst</a></tt>
   3550     <p>This subclass is the parent of the 12 casting instructions. It provides
   3551     common operations on cast instructions.</p>
   3552     <li><tt><a name="CmpInst">CmpInst</a></tt>
   3553     <p>This subclass respresents the two comparison instructions, 
   3554     <a href="LangRef.html#i_icmp">ICmpInst</a> (integer opreands), and
   3555     <a href="LangRef.html#i_fcmp">FCmpInst</a> (floating point operands).</p>
   3556     <li><tt><a name="TerminatorInst">TerminatorInst</a></tt>
   3557     <p>This subclass is the parent of all terminator instructions (those which
   3558     can terminate a block).</p>
   3559   </ul>
   3560   </div>
   3561 
   3562 <!-- _______________________________________________________________________ -->
   3563 <h4>
   3564   <a name="m_Instruction">
   3565     Important Public Members of the <tt>Instruction</tt> class
   3566   </a>
   3567 </h4>
   3568 
   3569 <div>
   3570 
   3571 <ul>
   3572   <li><tt><a href="#BasicBlock">BasicBlock</a> *getParent()</tt>
   3573     <p>Returns the <a href="#BasicBlock"><tt>BasicBlock</tt></a> that
   3574 this  <tt>Instruction</tt> is embedded into.</p></li>
   3575   <li><tt>bool mayWriteToMemory()</tt>
   3576     <p>Returns true if the instruction writes to memory, i.e. it is a
   3577       <tt>call</tt>,<tt>free</tt>,<tt>invoke</tt>, or <tt>store</tt>.</p></li>
   3578   <li><tt>unsigned getOpcode()</tt>
   3579     <p>Returns the opcode for the <tt>Instruction</tt>.</p></li>
   3580   <li><tt><a href="#Instruction">Instruction</a> *clone() const</tt>
   3581     <p>Returns another instance of the specified instruction, identical
   3582 in all ways to the original except that the instruction has no parent
   3583 (ie it's not embedded into a <a href="#BasicBlock"><tt>BasicBlock</tt></a>),
   3584 and it has no name</p></li>
   3585 </ul>
   3586 
   3587 </div>
   3588 
   3589 </div>
   3590 
   3591 <!-- ======================================================================= -->
   3592 <h3>
   3593   <a name="Constant">The <tt>Constant</tt> class and subclasses</a>
   3594 </h3>
   3595 
   3596 <div>
   3597 
   3598 <p>Constant represents a base class for different types of constants. It
   3599 is subclassed by ConstantInt, ConstantArray, etc. for representing 
   3600 the various types of Constants.  <a href="#GlobalValue">GlobalValue</a> is also
   3601 a subclass, which represents the address of a global variable or function.
   3602 </p>
   3603 
   3604 <!-- _______________________________________________________________________ -->
   3605 <h4>Important Subclasses of Constant</h4>
   3606 <div>
   3607 <ul>
   3608   <li>ConstantInt : This subclass of Constant represents an integer constant of
   3609   any width.
   3610     <ul>
   3611       <li><tt>const APInt&amp; getValue() const</tt>: Returns the underlying
   3612       value of this constant, an APInt value.</li>
   3613       <li><tt>int64_t getSExtValue() const</tt>: Converts the underlying APInt
   3614       value to an int64_t via sign extension. If the value (not the bit width)
   3615       of the APInt is too large to fit in an int64_t, an assertion will result.
   3616       For this reason, use of this method is discouraged.</li>
   3617       <li><tt>uint64_t getZExtValue() const</tt>: Converts the underlying APInt
   3618       value to a uint64_t via zero extension. IF the value (not the bit width)
   3619       of the APInt is too large to fit in a uint64_t, an assertion will result.
   3620       For this reason, use of this method is discouraged.</li>
   3621       <li><tt>static ConstantInt* get(const APInt&amp; Val)</tt>: Returns the
   3622       ConstantInt object that represents the value provided by <tt>Val</tt>.
   3623       The type is implied as the IntegerType that corresponds to the bit width
   3624       of <tt>Val</tt>.</li>
   3625       <li><tt>static ConstantInt* get(const Type *Ty, uint64_t Val)</tt>: 
   3626       Returns the ConstantInt object that represents the value provided by 
   3627       <tt>Val</tt> for integer type <tt>Ty</tt>.</li>
   3628     </ul>
   3629   </li>
   3630   <li>ConstantFP : This class represents a floating point constant.
   3631     <ul>
   3632       <li><tt>double getValue() const</tt>: Returns the underlying value of 
   3633       this constant. </li>
   3634     </ul>
   3635   </li>
   3636   <li>ConstantArray : This represents a constant array.
   3637     <ul>
   3638       <li><tt>const std::vector&lt;Use&gt; &amp;getValues() const</tt>: Returns 
   3639       a vector of component constants that makeup this array. </li>
   3640     </ul>
   3641   </li>
   3642   <li>ConstantStruct : This represents a constant struct.
   3643     <ul>
   3644       <li><tt>const std::vector&lt;Use&gt; &amp;getValues() const</tt>: Returns 
   3645       a vector of component constants that makeup this array. </li>
   3646     </ul>
   3647   </li>
   3648   <li>GlobalValue : This represents either a global variable or a function. In 
   3649   either case, the value is a constant fixed address (after linking). 
   3650   </li>
   3651 </ul>
   3652 </div>
   3653 
   3654 </div>
   3655 
   3656 <!-- ======================================================================= -->
   3657 <h3>
   3658   <a name="GlobalValue">The <tt>GlobalValue</tt> class</a>
   3659 </h3>
   3660 
   3661 <div>
   3662 
   3663 <p><tt>#include "<a
   3664 href="/doxygen/GlobalValue_8h-source.html">llvm/GlobalValue.h</a>"</tt><br>
   3665 doxygen info: <a href="/doxygen/classllvm_1_1GlobalValue.html">GlobalValue
   3666 Class</a><br>
   3667 Superclasses: <a href="#Constant"><tt>Constant</tt></a>, 
   3668 <a href="#User"><tt>User</tt></a>, <a href="#Value"><tt>Value</tt></a></p>
   3669 
   3670 <p>Global values (<a href="#GlobalVariable"><tt>GlobalVariable</tt></a>s or <a
   3671 href="#Function"><tt>Function</tt></a>s) are the only LLVM values that are
   3672 visible in the bodies of all <a href="#Function"><tt>Function</tt></a>s.
   3673 Because they are visible at global scope, they are also subject to linking with
   3674 other globals defined in different translation units.  To control the linking
   3675 process, <tt>GlobalValue</tt>s know their linkage rules. Specifically,
   3676 <tt>GlobalValue</tt>s know whether they have internal or external linkage, as
   3677 defined by the <tt>LinkageTypes</tt> enumeration.</p>
   3678 
   3679 <p>If a <tt>GlobalValue</tt> has internal linkage (equivalent to being
   3680 <tt>static</tt> in C), it is not visible to code outside the current translation
   3681 unit, and does not participate in linking.  If it has external linkage, it is
   3682 visible to external code, and does participate in linking.  In addition to
   3683 linkage information, <tt>GlobalValue</tt>s keep track of which <a
   3684 href="#Module"><tt>Module</tt></a> they are currently part of.</p>
   3685 
   3686 <p>Because <tt>GlobalValue</tt>s are memory objects, they are always referred to
   3687 by their <b>address</b>. As such, the <a href="#Type"><tt>Type</tt></a> of a
   3688 global is always a pointer to its contents. It is important to remember this
   3689 when using the <tt>GetElementPtrInst</tt> instruction because this pointer must
   3690 be dereferenced first. For example, if you have a <tt>GlobalVariable</tt> (a
   3691 subclass of <tt>GlobalValue)</tt> that is an array of 24 ints, type <tt>[24 x
   3692 i32]</tt>, then the <tt>GlobalVariable</tt> is a pointer to that array. Although
   3693 the address of the first element of this array and the value of the
   3694 <tt>GlobalVariable</tt> are the same, they have different types. The
   3695 <tt>GlobalVariable</tt>'s type is <tt>[24 x i32]</tt>. The first element's type
   3696 is <tt>i32.</tt> Because of this, accessing a global value requires you to
   3697 dereference the pointer with <tt>GetElementPtrInst</tt> first, then its elements
   3698 can be accessed. This is explained in the <a href="LangRef.html#globalvars">LLVM
   3699 Language Reference Manual</a>.</p>
   3700 
   3701 <!-- _______________________________________________________________________ -->
   3702 <h4>
   3703   <a name="m_GlobalValue">
   3704     Important Public Members of the <tt>GlobalValue</tt> class
   3705   </a>
   3706 </h4>
   3707 
   3708 <div>
   3709 
   3710 <ul>
   3711   <li><tt>bool hasInternalLinkage() const</tt><br>
   3712     <tt>bool hasExternalLinkage() const</tt><br>
   3713     <tt>void setInternalLinkage(bool HasInternalLinkage)</tt>
   3714     <p> These methods manipulate the linkage characteristics of the <tt>GlobalValue</tt>.</p>
   3715     <p> </p>
   3716   </li>
   3717   <li><tt><a href="#Module">Module</a> *getParent()</tt>
   3718     <p> This returns the <a href="#Module"><tt>Module</tt></a> that the
   3719 GlobalValue is currently embedded into.</p></li>
   3720 </ul>
   3721 
   3722 </div>
   3723 
   3724 </div>
   3725 
   3726 <!-- ======================================================================= -->
   3727 <h3>
   3728   <a name="Function">The <tt>Function</tt> class</a>
   3729 </h3>
   3730 
   3731 <div>
   3732 
   3733 <p><tt>#include "<a
   3734 href="/doxygen/Function_8h-source.html">llvm/Function.h</a>"</tt><br> doxygen
   3735 info: <a href="/doxygen/classllvm_1_1Function.html">Function Class</a><br>
   3736 Superclasses: <a href="#GlobalValue"><tt>GlobalValue</tt></a>, 
   3737 <a href="#Constant"><tt>Constant</tt></a>, 
   3738 <a href="#User"><tt>User</tt></a>, 
   3739 <a href="#Value"><tt>Value</tt></a></p>
   3740 
   3741 <p>The <tt>Function</tt> class represents a single procedure in LLVM.  It is
   3742 actually one of the more complex classes in the LLVM hierarchy because it must
   3743 keep track of a large amount of data.  The <tt>Function</tt> class keeps track
   3744 of a list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s, a list of formal 
   3745 <a href="#Argument"><tt>Argument</tt></a>s, and a 
   3746 <a href="#SymbolTable"><tt>SymbolTable</tt></a>.</p>
   3747 
   3748 <p>The list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s is the most
   3749 commonly used part of <tt>Function</tt> objects.  The list imposes an implicit
   3750 ordering of the blocks in the function, which indicate how the code will be
   3751 laid out by the backend.  Additionally, the first <a
   3752 href="#BasicBlock"><tt>BasicBlock</tt></a> is the implicit entry node for the
   3753 <tt>Function</tt>.  It is not legal in LLVM to explicitly branch to this initial
   3754 block.  There are no implicit exit nodes, and in fact there may be multiple exit
   3755 nodes from a single <tt>Function</tt>.  If the <a
   3756 href="#BasicBlock"><tt>BasicBlock</tt></a> list is empty, this indicates that
   3757 the <tt>Function</tt> is actually a function declaration: the actual body of the
   3758 function hasn't been linked in yet.</p>
   3759 
   3760 <p>In addition to a list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s, the
   3761 <tt>Function</tt> class also keeps track of the list of formal <a
   3762 href="#Argument"><tt>Argument</tt></a>s that the function receives.  This
   3763 container manages the lifetime of the <a href="#Argument"><tt>Argument</tt></a>
   3764 nodes, just like the <a href="#BasicBlock"><tt>BasicBlock</tt></a> list does for
   3765 the <a href="#BasicBlock"><tt>BasicBlock</tt></a>s.</p>
   3766 
   3767 <p>The <a href="#SymbolTable"><tt>SymbolTable</tt></a> is a very rarely used
   3768 LLVM feature that is only used when you have to look up a value by name.  Aside
   3769 from that, the <a href="#SymbolTable"><tt>SymbolTable</tt></a> is used
   3770 internally to make sure that there are not conflicts between the names of <a
   3771 href="#Instruction"><tt>Instruction</tt></a>s, <a
   3772 href="#BasicBlock"><tt>BasicBlock</tt></a>s, or <a
   3773 href="#Argument"><tt>Argument</tt></a>s in the function body.</p>
   3774 
   3775 <p>Note that <tt>Function</tt> is a <a href="#GlobalValue">GlobalValue</a>
   3776 and therefore also a <a href="#Constant">Constant</a>. The value of the function
   3777 is its address (after linking) which is guaranteed to be constant.</p>
   3778 
   3779 <!-- _______________________________________________________________________ -->
   3780 <h4>
   3781   <a name="m_Function">
   3782     Important Public Members of the <tt>Function</tt> class
   3783   </a>
   3784 </h4>
   3785 
   3786 <div>
   3787 
   3788 <ul>
   3789   <li><tt>Function(const </tt><tt><a href="#FunctionType">FunctionType</a>
   3790   *Ty, LinkageTypes Linkage, const std::string &amp;N = "", Module* Parent = 0)</tt>
   3791 
   3792     <p>Constructor used when you need to create new <tt>Function</tt>s to add
   3793     the the program.  The constructor must specify the type of the function to
   3794     create and what type of linkage the function should have. The <a 
   3795     href="#FunctionType"><tt>FunctionType</tt></a> argument
   3796     specifies the formal arguments and return value for the function. The same
   3797     <a href="#FunctionType"><tt>FunctionType</tt></a> value can be used to
   3798     create multiple functions. The <tt>Parent</tt> argument specifies the Module
   3799     in which the function is defined. If this argument is provided, the function
   3800     will automatically be inserted into that module's list of
   3801     functions.</p></li>
   3802 
   3803   <li><tt>bool isDeclaration()</tt>
   3804 
   3805     <p>Return whether or not the <tt>Function</tt> has a body defined.  If the
   3806     function is "external", it does not have a body, and thus must be resolved
   3807     by linking with a function defined in a different translation unit.</p></li>
   3808 
   3809   <li><tt>Function::iterator</tt> - Typedef for basic block list iterator<br>
   3810     <tt>Function::const_iterator</tt> - Typedef for const_iterator.<br>
   3811 
   3812     <tt>begin()</tt>, <tt>end()</tt>
   3813     <tt>size()</tt>, <tt>empty()</tt>
   3814 
   3815     <p>These are forwarding methods that make it easy to access the contents of
   3816     a <tt>Function</tt> object's <a href="#BasicBlock"><tt>BasicBlock</tt></a>
   3817     list.</p></li>
   3818 
   3819   <li><tt>Function::BasicBlockListType &amp;getBasicBlockList()</tt>
   3820 
   3821     <p>Returns the list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s.  This
   3822     is necessary to use when you need to update the list or perform a complex
   3823     action that doesn't have a forwarding method.</p></li>
   3824 
   3825   <li><tt>Function::arg_iterator</tt> - Typedef for the argument list
   3826 iterator<br>
   3827     <tt>Function::const_arg_iterator</tt> - Typedef for const_iterator.<br>
   3828 
   3829     <tt>arg_begin()</tt>, <tt>arg_end()</tt>
   3830     <tt>arg_size()</tt>, <tt>arg_empty()</tt>
   3831 
   3832     <p>These are forwarding methods that make it easy to access the contents of
   3833     a <tt>Function</tt> object's <a href="#Argument"><tt>Argument</tt></a>
   3834     list.</p></li>
   3835 
   3836   <li><tt>Function::ArgumentListType &amp;getArgumentList()</tt>
   3837 
   3838     <p>Returns the list of <a href="#Argument"><tt>Argument</tt></a>s.  This is
   3839     necessary to use when you need to update the list or perform a complex
   3840     action that doesn't have a forwarding method.</p></li>
   3841 
   3842   <li><tt><a href="#BasicBlock">BasicBlock</a> &amp;getEntryBlock()</tt>
   3843 
   3844     <p>Returns the entry <a href="#BasicBlock"><tt>BasicBlock</tt></a> for the
   3845     function.  Because the entry block for the function is always the first
   3846     block, this returns the first block of the <tt>Function</tt>.</p></li>
   3847 
   3848   <li><tt><a href="#Type">Type</a> *getReturnType()</tt><br>
   3849     <tt><a href="#FunctionType">FunctionType</a> *getFunctionType()</tt>
   3850 
   3851     <p>This traverses the <a href="#Type"><tt>Type</tt></a> of the
   3852     <tt>Function</tt> and returns the return type of the function, or the <a
   3853     href="#FunctionType"><tt>FunctionType</tt></a> of the actual
   3854     function.</p></li>
   3855 
   3856   <li><tt><a href="#SymbolTable">SymbolTable</a> *getSymbolTable()</tt>
   3857 
   3858     <p> Return a pointer to the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
   3859     for this <tt>Function</tt>.</p></li>
   3860 </ul>
   3861 
   3862 </div>
   3863 
   3864 </div>
   3865 
   3866 <!-- ======================================================================= -->
   3867 <h3>
   3868   <a name="GlobalVariable">The <tt>GlobalVariable</tt> class</a>
   3869 </h3>
   3870 
   3871 <div>
   3872 
   3873 <p><tt>#include "<a
   3874 href="/doxygen/GlobalVariable_8h-source.html">llvm/GlobalVariable.h</a>"</tt>
   3875 <br>
   3876 doxygen info: <a href="/doxygen/classllvm_1_1GlobalVariable.html">GlobalVariable
   3877  Class</a><br>
   3878 Superclasses: <a href="#GlobalValue"><tt>GlobalValue</tt></a>, 
   3879 <a href="#Constant"><tt>Constant</tt></a>,
   3880 <a href="#User"><tt>User</tt></a>,
   3881 <a href="#Value"><tt>Value</tt></a></p>
   3882 
   3883 <p>Global variables are represented with the (surprise surprise)
   3884 <tt>GlobalVariable</tt> class. Like functions, <tt>GlobalVariable</tt>s are also
   3885 subclasses of <a href="#GlobalValue"><tt>GlobalValue</tt></a>, and as such are
   3886 always referenced by their address (global values must live in memory, so their
   3887 "name" refers to their constant address). See 
   3888 <a href="#GlobalValue"><tt>GlobalValue</tt></a> for more on this.  Global 
   3889 variables may have an initial value (which must be a 
   3890 <a href="#Constant"><tt>Constant</tt></a>), and if they have an initializer, 
   3891 they may be marked as "constant" themselves (indicating that their contents 
   3892 never change at runtime).</p>
   3893 
   3894 <!-- _______________________________________________________________________ -->
   3895 <h4>
   3896   <a name="m_GlobalVariable">
   3897     Important Public Members of the <tt>GlobalVariable</tt> class
   3898   </a>
   3899 </h4>
   3900 
   3901 <div>
   3902 
   3903 <ul>
   3904   <li><tt>GlobalVariable(const </tt><tt><a href="#Type">Type</a> *Ty, bool
   3905   isConstant, LinkageTypes&amp; Linkage, <a href="#Constant">Constant</a>
   3906   *Initializer = 0, const std::string &amp;Name = "", Module* Parent = 0)</tt>
   3907 
   3908     <p>Create a new global variable of the specified type. If
   3909     <tt>isConstant</tt> is true then the global variable will be marked as
   3910     unchanging for the program. The Linkage parameter specifies the type of
   3911     linkage (internal, external, weak, linkonce, appending) for the variable.
   3912     If the linkage is InternalLinkage, WeakAnyLinkage, WeakODRLinkage,
   3913     LinkOnceAnyLinkage or LinkOnceODRLinkage,&nbsp; then the resultant
   3914     global variable will have internal linkage.  AppendingLinkage concatenates
   3915     together all instances (in different translation units) of the variable
   3916     into a single variable but is only applicable to arrays.  &nbsp;See
   3917     the <a href="LangRef.html#modulestructure">LLVM Language Reference</a> for
   3918     further details on linkage types. Optionally an initializer, a name, and the
   3919     module to put the variable into may be specified for the global variable as
   3920     well.</p></li>
   3921 
   3922   <li><tt>bool isConstant() const</tt>
   3923 
   3924     <p>Returns true if this is a global variable that is known not to
   3925     be modified at runtime.</p></li>
   3926 
   3927   <li><tt>bool hasInitializer()</tt>
   3928 
   3929     <p>Returns true if this <tt>GlobalVariable</tt> has an intializer.</p></li>
   3930 
   3931   <li><tt><a href="#Constant">Constant</a> *getInitializer()</tt>
   3932 
   3933     <p>Returns the initial value for a <tt>GlobalVariable</tt>.  It is not legal
   3934     to call this method if there is no initializer.</p></li>
   3935 </ul>
   3936 
   3937 </div>
   3938 
   3939 </div>
   3940 
   3941 <!-- ======================================================================= -->
   3942 <h3>
   3943   <a name="BasicBlock">The <tt>BasicBlock</tt> class</a>
   3944 </h3>
   3945 
   3946 <div>
   3947 
   3948 <p><tt>#include "<a
   3949 href="/doxygen/BasicBlock_8h-source.html">llvm/BasicBlock.h</a>"</tt><br>
   3950 doxygen info: <a href="/doxygen/classllvm_1_1BasicBlock.html">BasicBlock
   3951 Class</a><br>
   3952 Superclass: <a href="#Value"><tt>Value</tt></a></p>
   3953 
   3954 <p>This class represents a single entry single exit section of the code,
   3955 commonly known as a basic block by the compiler community.  The
   3956 <tt>BasicBlock</tt> class maintains a list of <a
   3957 href="#Instruction"><tt>Instruction</tt></a>s, which form the body of the block.
   3958 Matching the language definition, the last element of this list of instructions
   3959 is always a terminator instruction (a subclass of the <a
   3960 href="#TerminatorInst"><tt>TerminatorInst</tt></a> class).</p>
   3961 
   3962 <p>In addition to tracking the list of instructions that make up the block, the
   3963 <tt>BasicBlock</tt> class also keeps track of the <a
   3964 href="#Function"><tt>Function</tt></a> that it is embedded into.</p>
   3965 
   3966 <p>Note that <tt>BasicBlock</tt>s themselves are <a
   3967 href="#Value"><tt>Value</tt></a>s, because they are referenced by instructions
   3968 like branches and can go in the switch tables. <tt>BasicBlock</tt>s have type
   3969 <tt>label</tt>.</p>
   3970 
   3971 <!-- _______________________________________________________________________ -->
   3972 <h4>
   3973   <a name="m_BasicBlock">
   3974     Important Public Members of the <tt>BasicBlock</tt> class
   3975   </a>
   3976 </h4>
   3977 
   3978 <div>
   3979 <ul>
   3980 
   3981 <li><tt>BasicBlock(const std::string &amp;Name = "", </tt><tt><a
   3982  href="#Function">Function</a> *Parent = 0)</tt>
   3983 
   3984 <p>The <tt>BasicBlock</tt> constructor is used to create new basic blocks for
   3985 insertion into a function.  The constructor optionally takes a name for the new
   3986 block, and a <a href="#Function"><tt>Function</tt></a> to insert it into.  If
   3987 the <tt>Parent</tt> parameter is specified, the new <tt>BasicBlock</tt> is
   3988 automatically inserted at the end of the specified <a
   3989 href="#Function"><tt>Function</tt></a>, if not specified, the BasicBlock must be
   3990 manually inserted into the <a href="#Function"><tt>Function</tt></a>.</p></li>
   3991 
   3992 <li><tt>BasicBlock::iterator</tt> - Typedef for instruction list iterator<br>
   3993 <tt>BasicBlock::const_iterator</tt> - Typedef for const_iterator.<br>
   3994 <tt>begin()</tt>, <tt>end()</tt>, <tt>front()</tt>, <tt>back()</tt>,
   3995 <tt>size()</tt>, <tt>empty()</tt>
   3996 STL-style functions for accessing the instruction list.
   3997 
   3998 <p>These methods and typedefs are forwarding functions that have the same
   3999 semantics as the standard library methods of the same names.  These methods
   4000 expose the underlying instruction list of a basic block in a way that is easy to
   4001 manipulate.  To get the full complement of container operations (including
   4002 operations to update the list), you must use the <tt>getInstList()</tt>
   4003 method.</p></li>
   4004 
   4005 <li><tt>BasicBlock::InstListType &amp;getInstList()</tt>
   4006 
   4007 <p>This method is used to get access to the underlying container that actually
   4008 holds the Instructions.  This method must be used when there isn't a forwarding
   4009 function in the <tt>BasicBlock</tt> class for the operation that you would like
   4010 to perform.  Because there are no forwarding functions for "updating"
   4011 operations, you need to use this if you want to update the contents of a
   4012 <tt>BasicBlock</tt>.</p></li>
   4013 
   4014 <li><tt><a href="#Function">Function</a> *getParent()</tt>
   4015 
   4016 <p> Returns a pointer to <a href="#Function"><tt>Function</tt></a> the block is
   4017 embedded into, or a null pointer if it is homeless.</p></li>
   4018 
   4019 <li><tt><a href="#TerminatorInst">TerminatorInst</a> *getTerminator()</tt>
   4020 
   4021 <p> Returns a pointer to the terminator instruction that appears at the end of
   4022 the <tt>BasicBlock</tt>.  If there is no terminator instruction, or if the last
   4023 instruction in the block is not a terminator, then a null pointer is
   4024 returned.</p></li>
   4025 
   4026 </ul>
   4027 
   4028 </div>
   4029 
   4030 </div>
   4031 
   4032 <!-- ======================================================================= -->
   4033 <h3>
   4034   <a name="Argument">The <tt>Argument</tt> class</a>
   4035 </h3>
   4036 
   4037 <div>
   4038 
   4039 <p>This subclass of Value defines the interface for incoming formal
   4040 arguments to a function. A Function maintains a list of its formal
   4041 arguments. An argument has a pointer to the parent Function.</p>
   4042 
   4043 </div>
   4044 
   4045 </div>
   4046 
   4047 <!-- *********************************************************************** -->
   4048 <hr>
   4049 <address>
   4050   <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
   4051   src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
   4052   <a href="http://validator.w3.org/check/referer"><img
   4053   src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01 Strict"></a>
   4054 
   4055   <a href="mailto:dhurjati (a] cs.uiuc.edu">Dinakar Dhurjati</a> and
   4056   <a href="mailto:sabre (a] nondot.org">Chris Lattner</a><br>
   4057   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
   4058   Last modified: $Date: 2011-10-11 02:33:56 -0400 (Tue, 11 Oct 2011) $
   4059 </address>
   4060 
   4061 </body>
   4062 </html>
   4063