diff options
Diffstat (limited to 'docs')
68 files changed, 17181 insertions, 12834 deletions
diff --git a/docs/AddressSanitizer.html b/docs/AddressSanitizer.html deleted file mode 100644 index 397eafc..0000000 --- a/docs/AddressSanitizer.html +++ /dev/null @@ -1,171 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ --> -<html> -<head> - <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> - <title>AddressSanitizer, a fast memory error detector</title> - <link type="text/css" rel="stylesheet" href="../menu.css"> - <link type="text/css" rel="stylesheet" href="../content.css"> - <style type="text/css"> - td { - vertical-align: top; - } - </style> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>AddressSanitizer</h1> -<ul> - <li> <a href="#intro">Introduction</a> - <li> <a href="#howtobuild">How to Build</a> - <li> <a href="#usage">Usage</a> - <ul><li> <a href="#has_feature">__has_feature(address_sanitizer)</a></ul> - <ul><li> <a href="#no_address_safety_analysis"> - __attribute__((no_address_safety_analysis))</a></ul> - <li> <a href="#platforms">Supported Platforms</a> - <li> <a href="#limitations">Limitations</a> - <li> <a href="#status">Current Status</a> - <li> <a href="#moreinfo">More Information</a> -</ul> - -<h2 id="intro">Introduction</h2> -AddressSanitizer is a fast memory error detector. -It consists of a compiler instrumentation module and a run-time library. -The tool can detect the following types of bugs: -<ul> <li> Out-of-bounds accesses to heap, stack and globals - <li> Use-after-free - <li> Use-after-return (to some extent) - <li> Double-free, invalid free -</ul> -Typical slowdown introduced by AddressSanitizer is <b>2x</b>. - -<h2 id="howtobuild">How to build</h2> -Follow the <a href="../get_started.html">clang build instructions</a>. -CMake build is supported.<BR> - -<h2 id="usage">Usage</h2> -Simply compile and link your program with <tt>-fsanitize=address</tt> flag. <BR> -The AddressSanitizer run-time library should be linked to the final executable, -so make sure to use <tt>clang</tt> (not <tt>ld</tt>) for the final link step.<BR> -When linking shared libraries, the AddressSanitizer run-time is not linked, -so <tt>-Wl,-z,defs</tt> may cause link errors (don't use it with AddressSanitizer). <BR> - -To get a reasonable performance add <tt>-O1</tt> or higher. <BR> -To get nicer stack traces in error messages add -<tt>-fno-omit-frame-pointer</tt>. <BR> -To get perfect stack traces you may need to disable inlining (just use <tt>-O1</tt>) and tail call -elimination (<tt>-fno-optimize-sibling-calls</tt>). - -<pre> -% cat example_UseAfterFree.cc -int main(int argc, char **argv) { - int *array = new int[100]; - delete [] array; - return array[argc]; // BOOM -} -</pre> - -<pre> -# Compile and link -% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer example_UseAfterFree.cc -</pre> -OR -<pre> -# Compile -% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer -c example_UseAfterFree.cc -# Link -% clang -g -fsanitize=address example_UseAfterFree.o -</pre> - -If a bug is detected, the program will print an error message to stderr and exit with a -non-zero exit code. -Currently, AddressSanitizer does not symbolize its output, so you may need to use a -separate script to symbolize the result offline (this will be fixed in future). -<pre> -% ./a.out 2> log -% projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt -==9442== ERROR: AddressSanitizer heap-use-after-free on address 0x7f7ddab8c084 at pc 0x403c8c bp 0x7fff87fb82d0 sp 0x7fff87fb82c8 -READ of size 4 at 0x7f7ddab8c084 thread T0 - #0 0x403c8c in main example_UseAfterFree.cc:4 - #1 0x7f7ddabcac4d in __libc_start_main ??:0 -0x7f7ddab8c084 is located 4 bytes inside of 400-byte region [0x7f7ddab8c080,0x7f7ddab8c210) -freed by thread T0 here: - #0 0x404704 in operator delete[](void*) ??:0 - #1 0x403c53 in main example_UseAfterFree.cc:4 - #2 0x7f7ddabcac4d in __libc_start_main ??:0 -previously allocated by thread T0 here: - #0 0x404544 in operator new[](unsigned long) ??:0 - #1 0x403c43 in main example_UseAfterFree.cc:2 - #2 0x7f7ddabcac4d in __libc_start_main ??:0 -==9442== ABORTING -</pre> - -AddressSanitizer exits on the first detected error. This is by design. -One reason: it makes the generated code smaller and faster (both by ~5%). -Another reason: this makes fixing bugs unavoidable. With Valgrind, it is often -the case that users treat Valgrind warnings as false positives -(which they are not) and don't fix them. - - -<h3 id="has_feature">__has_feature(address_sanitizer)</h3> -In some cases one may need to execute different code depending on whether -AddressSanitizer is enabled. -<a href="LanguageExtensions.html#__has_feature_extension">__has_feature</a> -can be used for this purpose. -<pre> -#if defined(__has_feature) -# if __has_feature(address_sanitizer) - code that builds only under AddressSanitizer -# endif -#endif -</pre> - -<h3 id="no_address_safety_analysis">__attribute__((no_address_safety_analysis))</h3> -Some code should not be instrumented by AddressSanitizer. -One may use the function attribute -<a href="LanguageExtensions.html#address_sanitizer"> - <tt>no_address_safety_analysis</tt></a> -to disable instrumentation of a particular function. -This attribute may not be supported by other compilers, so we suggest to -use it together with <tt>__has_feature(address_sanitizer)</tt>. -Note: currently, this attribute will be lost if the function is inlined. - -<h2 id="platforms">Supported Platforms</h2> -AddressSanitizer is supported on -<ul><li>Linux i386/x86_64 (tested on Ubuntu 10.04 and 12.04). -<li>MacOS 10.6, 10.7 and 10.8 (i386/x86_64). -</ul> -Support for Linux ARM (and Android ARM) is in progress -(it may work, but is not guaranteed too). - - -<h2 id="limitations">Limitations</h2> -<ul> -<li> AddressSanitizer uses more real memory than a native run. -Exact overhead depends on the allocations sizes. The smaller the -allocations you make the bigger the overhead is. -<li> AddressSanitizer uses more stack memory. We have seen up to 3x increase. -<li> On 64-bit platforms AddressSanitizer maps (but not reserves) -16+ Terabytes of virtual address space. -This means that tools like <tt>ulimit</tt> may not work as usually expected. -<li> Static linking is not supported. -</ul> - - -<h2 id="status">Current Status</h2> -AddressSanitizer is fully functional on supported platforms starting from LLVM 3.1. -The test suite is integrated into CMake build and can be run with -<tt>make check-asan</tt> command. - -<h2 id="moreinfo">More Information</h2> -<a href="http://code.google.com/p/address-sanitizer/">http://code.google.com/p/address-sanitizer</a>. - - -</div> -</body> -</html> diff --git a/docs/AddressSanitizer.rst b/docs/AddressSanitizer.rst new file mode 100644 index 0000000..89e8644 --- /dev/null +++ b/docs/AddressSanitizer.rst @@ -0,0 +1,163 @@ +================ +AddressSanitizer +================ + +.. contents:: + :local: + +Introduction +============ + +AddressSanitizer is a fast memory error detector. It consists of a compiler +instrumentation module and a run-time library. The tool can detect the +following types of bugs: + +* Out-of-bounds accesses to heap, stack and globals +* Use-after-free +* Use-after-return (to some extent) +* Double-free, invalid free + +Typical slowdown introduced by AddressSanitizer is **2x**. + +How to build +============ + +Follow the `clang build instructions <../get_started.html>`_. CMake build is +supported. + +Usage +===== + +Simply compile and link your program with ``-fsanitize=address`` flag. The +AddressSanitizer run-time library should be linked to the final executable, so +make sure to use ``clang`` (not ``ld``) for the final link step. When linking +shared libraries, the AddressSanitizer run-time is not linked, so +``-Wl,-z,defs`` may cause link errors (don't use it with AddressSanitizer). To +get a reasonable performance add ``-O1`` or higher. To get nicer stack traces +in error messages add ``-fno-omit-frame-pointer``. To get perfect stack traces +you may need to disable inlining (just use ``-O1``) and tail call elimination +(``-fno-optimize-sibling-calls``). + +.. code-block:: console + + % cat example_UseAfterFree.cc + int main(int argc, char **argv) { + int *array = new int[100]; + delete [] array; + return array[argc]; // BOOM + } + + # Compile and link + % clang -O1 -g -fsanitize=address -fno-omit-frame-pointer example_UseAfterFree.cc + +or: + +.. code-block:: console + + # Compile + % clang -O1 -g -fsanitize=address -fno-omit-frame-pointer -c example_UseAfterFree.cc + # Link + % clang -g -fsanitize=address example_UseAfterFree.o + +If a bug is detected, the program will print an error message to stderr and +exit with a non-zero exit code. Currently, AddressSanitizer does not symbolize +its output, so you may need to use a separate script to symbolize the result +offline (this will be fixed in future). + +.. code-block:: console + + % ./a.out 2> log + % projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt + ==9442== ERROR: AddressSanitizer heap-use-after-free on address 0x7f7ddab8c084 at pc 0x403c8c bp 0x7fff87fb82d0 sp 0x7fff87fb82c8 + READ of size 4 at 0x7f7ddab8c084 thread T0 + #0 0x403c8c in main example_UseAfterFree.cc:4 + #1 0x7f7ddabcac4d in __libc_start_main ??:0 + 0x7f7ddab8c084 is located 4 bytes inside of 400-byte region [0x7f7ddab8c080,0x7f7ddab8c210) + freed by thread T0 here: + #0 0x404704 in operator delete[](void*) ??:0 + #1 0x403c53 in main example_UseAfterFree.cc:4 + #2 0x7f7ddabcac4d in __libc_start_main ??:0 + previously allocated by thread T0 here: + #0 0x404544 in operator new[](unsigned long) ??:0 + #1 0x403c43 in main example_UseAfterFree.cc:2 + #2 0x7f7ddabcac4d in __libc_start_main ??:0 + ==9442== ABORTING + +AddressSanitizer exits on the first detected error. This is by design. +One reason: it makes the generated code smaller and faster (both by +~5%). Another reason: this makes fixing bugs unavoidable. With Valgrind, +it is often the case that users treat Valgrind warnings as false +positives (which they are not) and don't fix them. + +``__has_feature(address_sanitizer)`` +------------------------------------ + +In some cases one may need to execute different code depending on whether +AddressSanitizer is enabled. +:ref:`\_\_has\_feature <langext-__has_feature-__has_extension>` can be used for +this purpose. + +.. code-block:: c + + #if defined(__has_feature) + # if __has_feature(address_sanitizer) + // code that builds only under AddressSanitizer + # endif + #endif + +``__attribute__((no_sanitize_address))`` +----------------------------------------------- + +Some code should not be instrumented by AddressSanitizer. One may use the +function attribute +:ref:`no_sanitize_address <langext-address_sanitizer>` +(or a deprecated synonym `no_address_safety_analysis`) +to disable instrumentation of a particular function. This attribute may not be +supported by other compilers, so we suggest to use it together with +``__has_feature(address_sanitizer)``. Note: currently, this attribute will be +lost if the function is inlined. + +Initialization order checking +----------------------------- + +AddressSanitizer can optionally detect dynamic initialization order problems, +when initialization of globals defined in one translation unit uses +globals defined in another translation unit. To enable this check at runtime, +you should set environment variable +``ASAN_OPTIONS=check_initialization_order=1``. + +Supported Platforms +=================== + +AddressSanitizer is supported on + +* Linux i386/x86\_64 (tested on Ubuntu 10.04 and 12.04); +* MacOS 10.6, 10.7 and 10.8 (i386/x86\_64). + +Support for Linux ARM (and Android ARM) is in progress (it may work, but +is not guaranteed too). + +Limitations +=========== + +* AddressSanitizer uses more real memory than a native run. Exact overhead + depends on the allocations sizes. The smaller the allocations you make the + bigger the overhead is. +* AddressSanitizer uses more stack memory. We have seen up to 3x increase. +* On 64-bit platforms AddressSanitizer maps (but not reserves) 16+ Terabytes of + virtual address space. This means that tools like ``ulimit`` may not work as + usually expected. +* Static linking is not supported. + +Current Status +============== + +AddressSanitizer is fully functional on supported platforms starting from LLVM +3.1. The test suite is integrated into CMake build and can be run with ``make +check-asan`` command. + +More Information +================ + +`http://code.google.com/p/address-sanitizer <http://code.google.com/p/address-sanitizer/>`_ + diff --git a/docs/AnalyzerRegions.html b/docs/AnalyzerRegions.html deleted file mode 100644 index f9d3337..0000000 --- a/docs/AnalyzerRegions.html +++ /dev/null @@ -1,260 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>Static Analyzer Design Document: Memory Regions</title> -</head> -<body> - -<h1>Static Analyzer Design Document: Memory Regions</h1> - -<h3>Authors</h3> - -<p>Ted Kremenek, <tt>kremenek at apple</tt><br> -Zhongxing Xu, <tt>xuzhongzhing at gmail</tt></p> - -<h2 id="intro">Introduction</h2> - -<p>The path-sensitive analysis engine in libAnalysis employs an extensible API -for abstractly modeling the memory of an analyzed program. This API employs the -concept of "memory regions" to abstractly model chunks of program memory such as -program variables and dynamically allocated memory such as those returned from -'malloc' and 'alloca'. Regions are hierarchical, with subregions modeling -subtyping relationships, field and array offsets into larger chunks of memory, -and so on.</p> - -<p>The region API consists of two components:</p> - -<ul> <li>A taxonomy and representation of regions themselves within the analyzer -engine. The primary definitions and interfaces are described in <tt><a -href="http://clang.llvm.org/doxygen/MemRegion_8h-source.html">MemRegion.h</a></tt>. -At the root of the region hierarchy is the class <tt>MemRegion</tt> with -specific subclasses refining the region concept for variables, heap allocated -memory, and so forth.</li> <li>The modeling of binding of values to regions. For -example, modeling the value stored to a local variable <tt>x</tt> consists of -recording the binding between the region for <tt>x</tt> (which represents the -raw memory associated with <tt>x</tt>) and the value stored to <tt>x</tt>. This -binding relationship is captured with the notion of "symbolic -stores."</li> </ul> - -<p>Symbolic stores, which can be thought of as representing the relation -<tt>regions -> values</tt>, are implemented by subclasses of the -<tt>StoreManager</tt> class (<tt><a -href="http://clang.llvm.org/doxygen/Store_8h-source.html">Store.h</a></tt>). A -particular StoreManager implementation has complete flexibility concerning the -following: - -<ul> -<li><em>How</em> to model the binding between regions and values</li> -<li><em>What</em> bindings are recorded -</ul> - -<p>Together, both points allow different StoreManagers to tradeoff between -different levels of analysis precision and scalability concerning the reasoning -of program memory. Meanwhile, the core path-sensitive engine makes no -assumptions about either points, and queries a StoreManager about the bindings -to a memory region through a generic interface that all StoreManagers share. If -a particular StoreManager cannot reason about the potential bindings of a given -memory region (e.g., '<tt>BasicStoreManager</tt>' does not reason about fields -of structures) then the StoreManager can simply return 'unknown' (represented by -'<tt>UnknownVal</tt>') for a particular region-binding. This separation of -concerns not only isolates the core analysis engine from the details of -reasoning about program memory but also facilities the option of a client of the -path-sensitive engine to easily swap in different StoreManager implementations -that internally reason about program memory in very different ways.</p> - -<p>The rest of this document is divided into two parts. We first discuss region -taxonomy and the semantics of regions. We then discuss the StoreManager -interface, and details of how the currently available StoreManager classes -implement region bindings.</p> - -<h2 id="regions">Memory Regions and Region Taxonomy</h2> - -<h3>Pointers</h3> - -<p>Before talking about the memory regions, we would talk about the pointers -since memory regions are essentially used to represent pointer values.</p> - -<p>The pointer is a type of values. Pointer values have two semantic aspects. -One is its physical value, which is an address or location. The other is the -type of the memory object residing in the address.</p> - -<p>Memory regions are designed to abstract these two properties of the pointer. -The physical value of a pointer is represented by MemRegion pointers. The rvalue -type of the region corresponds to the type of the pointee object.</p> - -<p>One complication is that we could have different view regions on the same -memory chunk. They represent the same memory location, but have different -abstract location, i.e., MemRegion pointers. Thus we need to canonicalize the -abstract locations to get a unique abstract location for one physical -location.</p> - -<p>Furthermore, these different view regions may or may not represent memory -objects of different types. Some different types are semantically the same, -for example, 'struct s' and 'my_type' are the same type.</p> - -<pre> -struct s; -typedef struct s my_type; -</pre> - -<p>But <tt>char</tt> and <tt>int</tt> are not the same type in the code below:</p> - -<pre> -void *p; -int *q = (int*) p; -char *r = (char*) p; -</pre> - -<p>Thus we need to canonicalize the MemRegion which is used in binding and -retrieving.</p> - -<h3>Regions</h3> -<p>Region is the entity used to model pointer values. A Region has the following -properties:</p> - -<ul> -<li>Kind</li> - -<li>ObjectType: the type of the object residing on the region.</li> - -<li>LocationType: the type of the pointer value that the region corresponds to. - Usually this is the pointer to the ObjectType. But sometimes we want to cache - this type explicitly, for example, for a CodeTextRegion.</li> - -<li>StartLocation</li> - -<li>EndLocation</li> -</ul> - -<h3>Symbolic Regions</h3> - -<p>A symbolic region is a map of the concept of symbolic values into the domain -of regions. It is the way that we represent symbolic pointers. Whenever a -symbolic pointer value is needed, a symbolic region is created to represent -it.</p> - -<p>A symbolic region has no type. It wraps a SymbolData. But sometimes we have -type information associated with a symbolic region. For this case, a -TypedViewRegion is created to layer the type information on top of the symbolic -region. The reason we do not carry type information with the symbolic region is -that the symbolic regions can have no type. To be consistent, we don't let them -to carry type information.</p> - -<p>Like a symbolic pointer, a symbolic region may be NULL, has unknown extent, -and represents a generic chunk of memory.</p> - -<p><em><b>NOTE</b>: We plan not to use loc::SymbolVal in RegionStore and remove it - gradually.</em></p> - -<p>Symbolic regions get their rvalue types through the following ways:</p> - -<ul> -<li>Through the parameter or global variable that points to it, e.g.: -<pre> -void f(struct s* p) { - ... -} -</pre> - -<p>The symbolic region pointed to by <tt>p</tt> has type <tt>struct -s</tt>.</p></li> - -<li>Through explicit or implicit casts, e.g.: -<pre> -void f(void* p) { - struct s* q = (struct s*) p; - ... -} -</pre> -</li> -</ul> - -<p>We attach the type information to the symbolic region lazily. For the first -case above, we create the <tt>TypedViewRegion</tt> only when the pointer is -actually used to access the pointee memory object, that is when the element or -field region is created. For the cast case, the <tt>TypedViewRegion</tt> is -created when visiting the <tt>CastExpr</tt>.</p> - -<p>The reason for doing lazy typing is that symbolic regions are sometimes only -used to do location comparison.</p> - -<h3>Pointer Casts</h3> - -<p>Pointer casts allow people to impose different 'views' onto a chunk of -memory.</p> - -<p>Usually we have two kinds of casts. One kind of casts cast down with in the -type hierarchy. It imposes more specific views onto more generic memory regions. -The other kind of casts cast up with in the type hierarchy. It strips away more -specific views on top of the more generic memory regions.</p> - -<p>We simulate the down casts by layering another <tt>TypedViewRegion</tt> on -top of the original region. We simulate the up casts by striping away the top -<tt>TypedViewRegion</tt>. Down casts is usually simple. For up casts, if the -there is no <tt>TypedViewRegion</tt> to be stripped, we return the original -region. If the underlying region is of the different type than the cast-to type, -we flag an error state.</p> - -<p>For toll-free bridging casts, we return the original region.</p> - -<p>We can set up a partial order for pointer types, with the most general type -<tt>void*</tt> at the top. The partial order forms a tree with <tt>void*</tt> as -its root node.</p> - -<p>Every <tt>MemRegion</tt> has a root position in the type tree. For example, -the pointee region of <tt>void *p</tt> has its root position at the root node of -the tree. <tt>VarRegion</tt> of <tt>int x</tt> has its root position at the 'int -type' node.</p> - -<p><tt>TypedViewRegion</tt> is used to move the region down or up in the tree. -Moving down in the tree adds a <tt>TypedViewRegion</tt>. Moving up in the tree -removes a <Tt>TypedViewRegion</tt>.</p> - -<p>Do we want to allow moving up beyond the root position? This happens -when:</p> <pre> int x; void *p = &x; </pre> - -<p>The region of <tt>x</tt> has its root position at 'int*' node. the cast to -void* moves that region up to the 'void*' node. I propose to not allow such -casts, and assign the region of <tt>x</tt> for <tt>p</tt>.</p> - -<p>Another non-ideal case is that people might cast to a non-generic pointer -from another non-generic pointer instead of first casting it back to the generic -pointer. Direct handling of this case would result in multiple layers of -TypedViewRegions. This enforces an incorrect semantic view to the region, -because we can only have one typed view on a region at a time. To avoid this -inconsistency, before casting the region, we strip the TypedViewRegion, then do -the cast. In summary, we only allow one layer of TypedViewRegion.</p> - -<h3>Region Bindings</h3> - -<p>The following region kinds are boundable: VarRegion, CompoundLiteralRegion, -StringRegion, ElementRegion, FieldRegion, and ObjCIvarRegion.</p> - -<p>When binding regions, we perform canonicalization on element regions and field -regions. This is because we can have different views on the same region, some -of which are essentially the same view with different sugar type names.</p> - -<p>To canonicalize a region, we get the canonical types for all TypedViewRegions -along the way up to the root region, and make new TypedViewRegions with those -canonical types.</p> - -<p>For Objective-C and C++, perhaps another canonicalization rule should be -added: for FieldRegion, the least derived class that has the field is used as -the type of the super region of the FieldRegion.</p> - -<p>All bindings and retrievings are done on the canonicalized regions.</p> - -<p>Canonicalization is transparent outside the region store manager, and more -specifically, unaware outside the Bind() and Retrieve() method. We don't need to -consider region canonicalization when doing pointer cast.</p> - -<h3>Constraint Manager</h3> - -<p>The constraint manager reasons about the abstract location of memory objects. -We can have different views on a region, but none of these views changes the -location of that object. Thus we should get the same abstract location for those -regions.</p> - -</body> -</html> diff --git a/docs/AutomaticReferenceCounting.html b/docs/AutomaticReferenceCounting.html deleted file mode 100644 index 5354f8a..0000000 --- a/docs/AutomaticReferenceCounting.html +++ /dev/null @@ -1,2226 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>Objective-C Automatic Reference Counting (ARC)</title> -<link type="text/css" rel="stylesheet" href="../menu.css"> -<link type="text/css" rel="stylesheet" href="../content.css"> -<style type="text/css"> -/* Collapse the items in the ToC to the left. */ -div#toc ul { - padding-left: 0 -} - -/* Rationales appear in italic. */ -div.rationale { - font-style: italic -} - -div.rationale em { - font-style: normal -} - -/* Revisions are also italicized. */ -span.revision { - font-style: italic -} - -span.whenRevised { - font-weight: bold; - font-style: normal -} - -div h1 { font-size: 2em; margin: .67em 0 } -div div h1 { font-size: 1.5em; margin: .75em 0 } -div div div h1 { font-size: 1.17em; margin: .83em 0 } -div div div div h1 { margin: 1.12em 0 } - -span.term { font-style: italic; font-weight: bold } -</style> - -<script type="text/javascript"> -/// A little script to recursively build a table of contents. -function buildTOC(div, toc, ancestry) { - var children = div.childNodes; - var len = children.length; - - var childNumber = 0; - - var list = null; - for (var i = 0; i < len; ++i) { - var child = children[i]; - if (child.nodeName != "DIV") continue; - if (child.getAttribute("class") == "rationale") continue; - if (child.id == "toc") continue; - - // Okay, we're actually going to build a list node. - if (list === null) list = document.createElement("ul"); - - var childAncestry = ancestry + ++childNumber + "."; - - var headerNode = child.childNodes[1]; - var title = headerNode.innerHTML; - headerNode.insertBefore(document.createTextNode(childAncestry + " "), - headerNode.firstChild); - - var item = document.createElement("li"); - item.appendChild(document.createTextNode(childAncestry + " ")); - - var anchor = document.createElement("a"); - anchor.href = "#" + child.id; - anchor.innerHTML = title; - item.appendChild(anchor); - - buildTOC(child, item, childAncestry); - - list.appendChild(item); - } - if (list) toc.appendChild(list); -} - -function onLoad() { - var toc = document.getElementById("toc"); - var content = document.getElementById("content"); - buildTOC(content, toc, ""); -} -window.onload = onLoad; - -</script> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> -<h1>Automatic Reference Counting</h1> - -<div id="toc"> -</div> - -<div id="meta"> -<h1>About this document</h1> - -<div id="meta.purpose"> -<h1>Purpose</h1> - -<p>The first and primary purpose of this document is to serve as a -complete technical specification of Automatic Reference Counting. -Given a core Objective-C compiler and runtime, it should be possible -to write a compiler and runtime which implements these new -semantics.</p> - -<p>The secondary purpose is to act as a rationale for why ARC was -designed in this way. This should remain tightly focused on the -technical design and should not stray into marketing speculation.</p> - -</div> <!-- meta.purpose --> - -<div id="meta.background"> -<h1>Background</h1> - -<p>This document assumes a basic familiarity with C.</p> - -<p><span class="term">Blocks</span> are a C language extension for -creating anonymous functions. Users interact with and transfer block -objects using <span class="term">block pointers</span>, which are -represented like a normal pointer. A block may capture values from -local variables; when this occurs, memory must be dynamically -allocated. The initial allocation is done on the stack, but the -runtime provides a <tt>Block_copy</tt> function which, given a block -pointer, either copies the underlying block object to the heap, -setting its reference count to 1 and returning the new block pointer, -or (if the block object is already on the heap) increases its -reference count by 1. The paired function is <tt>Block_release</tt>, -which decreases the reference count by 1 and destroys the object if -the count reaches zero and is on the heap.</p> - -<p>Objective-C is a set of language extensions, significant enough to -be considered a different language. It is a strict superset of C. -The extensions can also be imposed on C++, producing a language called -Objective-C++. The primary feature is a single-inheritance object -system; we briefly describe the modern dialect.</p> - -<p>Objective-C defines a new type kind, collectively called -the <span class="term">object pointer types</span>. This kind has two -notable builtin members, <tt>id</tt> and <tt>Class</tt>; <tt>id</tt> -is the final supertype of all object pointers. The validity of -conversions between object pointer types is not checked at runtime. -Users may define <span class="term">classes</span>; each class is a -type, and the pointer to that type is an object pointer type. A class -may have a superclass; its pointer type is a subtype of its -superclass's pointer type. A class has a set -of <span class="term">ivars</span>, fields which appear on all -instances of that class. For every class <i>T</i> there's an -associated metaclass; it has no fields, its superclass is the -metaclass of <i>T</i>'s superclass, and its metaclass is a global -class. Every class has a global object whose class is the -class's metaclass; metaclasses have no associated type, so pointers to -this object have type <tt>Class</tt>.</p> - -<p>A class declaration (<tt>@interface</tt>) declares a set -of <span class="term">methods</span>. A method has a return type, a -list of argument types, and a <span class="term">selector</span>: a -name like <tt>foo:bar:baz:</tt>, where the number of colons -corresponds to the number of formal arguments. A method may be an -instance method, in which case it can be invoked on objects of the -class, or a class method, in which case it can be invoked on objects -of the metaclass. A method may be invoked by providing an object -(called the <span class="term">receiver</span>) and a list of formal -arguments interspersed with the selector, like so:</p> - -<pre>[receiver foo: fooArg bar: barArg baz: bazArg]</pre> - -<p>This looks in the dynamic class of the receiver for a method with -this name, then in that class's superclass, etc., until it finds -something it can execute. The receiver <q>expression</q> may also be -the name of a class, in which case the actual receiver is the class -object for that class, or (within method definitions) it may -be <tt>super</tt>, in which case the lookup algorithm starts with the -static superclass instead of the dynamic class. The actual methods -dynamically found in a class are not those declared in the -<tt>@interface</tt>, but those defined in a separate -<tt>@implementation</tt> declaration; however, when compiling a -call, typechecking is done based on the methods declared in the -<tt>@interface</tt>.</p> - -<p>Method declarations may also be grouped into -<span class="term">protocols</span>, which are not inherently -associated with any class, but which classes may claim to follow. -Object pointer types may be qualified with additional protocols that -the object is known to support.</p> - -<p><span class="term">Class extensions</span> are collections of ivars -and methods, designed to allow a class's <tt>@interface</tt> to be -split across multiple files; however, there is still a primary -implementation file which must see the <tt>@interface</tt>s of all -class extensions. -<span class="term">Categories</span> allow methods (but not ivars) to -be declared <i>post hoc</i> on an arbitrary class; the methods in the -category's <tt>@implementation</tt> will be dynamically added to that -class's method tables which the category is loaded at runtime, -replacing those methods in case of a collision.</p> - -<p>In the standard environment, objects are allocated on the heap, and -their lifetime is manually managed using a reference count. This is -done using two instance methods which all classes are expected to -implement: <tt>retain</tt> increases the object's reference count by -1, whereas <tt>release</tt> decreases it by 1 and calls the instance -method <tt>dealloc</tt> if the count reaches 0. To simplify certain -operations, there is also an <span class="term">autorelease -pool</span>, a thread-local list of objects to call <tt>release</tt> -on later; an object can be added to this pool by -calling <tt>autorelease</tt> on it.</p> - -<p>Block pointers may be converted to type <tt>id</tt>; block objects -are laid out in a way that makes them compatible with Objective-C -objects. There is a builtin class that all block objects are -considered to be objects of; this class implements <tt>retain</tt> by -adjusting the reference count, not by calling <tt>Block_copy</tt>.</p> - -</div> <!-- meta.background --> - -<div id="meta.evolution"> -<h1>Evolution</h1> - -<p>ARC is under continual evolution, and this document must be updated -as the language progresses.</p> - -<p>If a change increases the expressiveness of the language, for -example by lifting a restriction or by adding new syntax, the change -will be annotated with a revision marker, like so:</p> - -<blockquote> - ARC applies to Objective-C pointer types, block pointer types, and - <span class="revision"><span class="whenRevised">[beginning Apple - 8.0, LLVM 3.8]</span> BPTRs declared within <code>extern - "BCPL"</code> blocks</span>. -</blockquote> - -<p>For now, it is sensible to version this document by the releases of -its sole implementation (and its host project), clang. -<q>LLVM X.Y</q> refers to an open-source release of clang from the -LLVM project. <q>Apple X.Y</q> refers to an Apple-provided release of -the Apple LLVM Compiler. Other organizations that prepare their own, -separately-versioned clang releases and wish to maintain similar -information in this document should send requests to cfe-dev.</p> - -<p>If a change decreases the expressiveness of the language, for -example by imposing a new restriction, this should be taken as an -oversight in the original specification and something to be avoided -in all versions. Such changes are generally to be avoided.</p> - -</div> <!-- meta.evolution --> - -</div> <!-- meta --> - -<div id="general"> -<h1>General</h1> - -<p>Automatic Reference Counting implements automatic memory management -for Objective-C objects and blocks, freeing the programmer from the -need to explicitly insert retains and releases. It does not provide a -cycle collector; users must explicitly manage the lifetime of their -objects, breaking cycles manually or with weak or unsafe -references.</p> - -<p>ARC may be explicitly enabled with the compiler -flag <tt>-fobjc-arc</tt>. It may also be explicitly disabled with the -compiler flag <tt>-fno-objc-arc</tt>. The last of these two flags -appearing on the compile line <q>wins</q>.</p> - -<p>If ARC is enabled, <tt>__has_feature(objc_arc)</tt> will expand to -1 in the preprocessor. For more information about <tt>__has_feature</tt>, -see the <a href="LanguageExtensions.html#__has_feature_extension">language -extensions</a> document.</p> - -</div> <!-- general --> - -<div id="objects"> -<h1>Retainable object pointers</h1> - -<p>This section describes retainable object pointers, their basic -operations, and the restrictions imposed on their use under ARC. Note -in particular that it covers the rules for pointer <em>values</em> -(patterns of bits indicating the location of a pointed-to object), not -pointer -<em>objects</em> (locations in memory which store pointer values). -The rules for objects are covered in the next section.</p> - -<p>A <span class="term">retainable object pointer</span> -(or <q>retainable pointer</q>) is a value of -a <span class="term">retainable object pointer type</span> -(<q>retainable type</q>). There are three kinds of retainable object -pointer types:</p> -<ul> -<li>block pointers (formed by applying the caret (<tt>^</tt>) -declarator sigil to a function type)</li> -<li>Objective-C object pointers (<tt>id</tt>, <tt>Class</tt>, <tt>NSFoo*</tt>, etc.)</li> -<li>typedefs marked with <tt>__attribute__((NSObject))</tt></li> -</ul> - -<p>Other pointer types, such as <tt>int*</tt> and <tt>CFStringRef</tt>, -are not subject to ARC's semantics and restrictions.</p> - -<div class="rationale"> - -<p>Rationale: We are not at liberty to require -all code to be recompiled with ARC; therefore, ARC must interoperate -with Objective-C code which manages retains and releases manually. In -general, there are three requirements in order for a -compiler-supported reference-count system to provide reliable -interoperation:</p> - -<ul> -<li>The type system must reliably identify which objects are to be -managed. An <tt>int*</tt> might be a pointer to a <tt>malloc</tt>'ed -array, or it might be an interior pointer to such an array, or it might -point to some field or local variable. In contrast, values of the -retainable object pointer types are never interior.</li> -<li>The type system must reliably indicate how to -manage objects of a type. This usually means that the type must imply -a procedure for incrementing and decrementing retain counts. -Supporting single-ownership objects requires a lot more explicit -mediation in the language.</li> -<li>There must be reliable conventions for whether and -when <q>ownership</q> is passed between caller and callee, for both -arguments and return values. Objective-C methods follow such a -convention very reliably, at least for system libraries on Mac OS X, -and functions always pass objects at +0. The C-based APIs for Core -Foundation objects, on the other hand, have much more varied transfer -semantics.</li> -</ul> -</div> <!-- rationale --> - -<p>The use of <tt>__attribute__((NSObject))</tt> typedefs is not -recommended. If it's absolutely necessary to use this attribute, be -very explicit about using the typedef, and do not assume that it will -be preserved by language features like <tt>__typeof</tt> and C++ -template argument substitution.</p> - -<div class="rationale"><p>Rationale: any compiler operation which -incidentally strips type <q>sugar</q> from a type will yield a type -without the attribute, which may result in unexpected -behavior.</p></div> - -<div id="objects.retains"> -<h1>Retain count semantics</h1> - -<p>A retainable object pointer is either a <span class="term">null -pointer</span> or a pointer to a valid object. Furthermore, if it has -block pointer type and is not <tt>null</tt> then it must actually be a -pointer to a block object, and if it has <tt>Class</tt> type (possibly -protocol-qualified) then it must actually be a pointer to a class -object. Otherwise ARC does not enforce the Objective-C type system as -long as the implementing methods follow the signature of the static -type. It is undefined behavior if ARC is exposed to an invalid -pointer.</p> - -<p>For ARC's purposes, a valid object is one with <q>well-behaved</q> -retaining operations. Specifically, the object must be laid out such -that the Objective-C message send machinery can successfully send it -the following messages:</p> - -<ul> -<li><tt>retain</tt>, taking no arguments and returning a pointer to -the object.</li> -<li><tt>release</tt>, taking no arguments and returning <tt>void</tt>.</li> -<li><tt>autorelease</tt>, taking no arguments and returning a pointer -to the object.</li> -</ul> - -<p>The behavior of these methods is constrained in the following ways. -The term <span class="term">high-level semantics</span> is an -intentionally vague term; the intent is that programmers must -implement these methods in a way such that the compiler, modifying -code in ways it deems safe according to these constraints, will not -violate their requirements. For example, if the user puts logging -statements in <tt>retain</tt>, they should not be surprised if those -statements are executed more or less often depending on optimization -settings. These constraints are not exhaustive of the optimization -opportunities: values held in local variables are subject to -additional restrictions, described later in this document.</p> - -<p>It is undefined behavior if a computation history featuring a send -of <tt>retain</tt> followed by a send of <tt>release</tt> to the same -object, with no intervening <tt>release</tt> on that object, is not -equivalent under the high-level semantics to a computation -history in which these sends are removed. Note that this implies that -these methods may not raise exceptions.</p> - -<p>It is undefined behavior if a computation history features any use -whatsoever of an object following the completion of a send -of <tt>release</tt> that is not preceded by a send of <tt>retain</tt> -to the same object.</p> - -<p>The behavior of <tt>autorelease</tt> must be equivalent to sending -<tt>release</tt> when one of the autorelease pools currently in scope -is popped. It may not throw an exception.</p> - -<p>When the semantics call for performing one of these operations on a -retainable object pointer, if that pointer is <tt>null</tt> then the -effect is a no-op.</p> - -<p>All of the semantics described in this document are subject to -additional <a href="#optimization">optimization rules</a> which permit -the removal or optimization of operations based on local knowledge of -data flow. The semantics describe the high-level behaviors that the -compiler implements, not an exact sequence of operations that a -program will be compiled into.</p> - -</div> <!-- objects.retains --> - -<div id="objects.operands"> -<h1>Retainable object pointers as operands and arguments</h1> - -<p>In general, ARC does not perform retain or release operations when -simply using a retainable object pointer as an operand within an -expression. This includes:</p> -<ul> -<li>loading a retainable pointer from an object with non-weak -<a href="#ownership">ownership</a>,</li> -<li>passing a retainable pointer as an argument to a function or -method, and</li> -<li>receiving a retainable pointer as the result of a function or -method call.</li> -</ul> - -<div class="rationale"><p>Rationale: while this might seem -uncontroversial, it is actually unsafe when multiple expressions are -evaluated in <q>parallel</q>, as with binary operators and calls, -because (for example) one expression might load from an object while -another writes to it. However, C and C++ already call this undefined -behavior because the evaluations are unsequenced, and ARC simply -exploits that here to avoid needing to retain arguments across a large -number of calls.</p></div> - -<p>The remainder of this section describes exceptions to these rules, -how those exceptions are detected, and what those exceptions imply -semantically.</p> - -<div id="objects.operands.consumed"> -<h1>Consumed parameters</h1> - -<p>A function or method parameter of retainable object pointer type -may be marked as <span class="term">consumed</span>, signifying that -the callee expects to take ownership of a +1 retain count. This is -done by adding the <tt>ns_consumed</tt> attribute to the parameter -declaration, like so:</p> - -<pre>void foo(__attribute((ns_consumed)) id x); -- (void) foo: (id) __attribute((ns_consumed)) x;</pre> - -<p>This attribute is part of the type of the function or method, not -the type of the parameter. It controls only how the argument is -passed and received.</p> - -<p>When passing such an argument, ARC retains the argument prior to -making the call.</p> - -<p>When receiving such an argument, ARC releases the argument at the -end of the function, subject to the usual optimizations for local -values.</p> - -<div class="rationale"><p>Rationale: this formalizes direct transfers -of ownership from a caller to a callee. The most common scenario here -is passing the <tt>self</tt> parameter to <tt>init</tt>, but it is -useful to generalize. Typically, local optimization will remove any -extra retains and releases: on the caller side the retain will be -merged with a +1 source, and on the callee side the release will be -rolled into the initialization of the parameter.</p></div> - -<p>The implicit <tt>self</tt> parameter of a method may be marked as -consumed by adding <tt>__attribute__((ns_consumes_self))</tt> to the -method declaration. Methods in the <tt>init</tt> -<a href="#family">family</a> are treated as if they were implicitly -marked with this attribute.</p> - -<p>It is undefined behavior if an Objective-C message send to a method -with <tt>ns_consumed</tt> parameters (other than self) is made with a -null receiver. It is undefined behavior if the method to which an -Objective-C message send statically resolves to has a different set -of <tt>ns_consumed</tt> parameters than the method it dynamically -resolves to. It is undefined behavior if a block or function call is -made through a static type with a different set of <tt>ns_consumed</tt> -parameters than the implementation of the called block or function.</p> - -<div class="rationale"><p>Rationale: consumed parameters with null -receiver are a guaranteed leak. Mismatches with consumed parameters -will cause over-retains or over-releases, depending on the direction. -The rule about function calls is really just an application of the -existing C/C++ rule about calling functions through an incompatible -function type, but it's useful to state it explicitly.</p></div> - -</div> <!-- objects.operands.consumed --> - -<div id="objects.operands.retained-returns"> -<h1>Retained return values</h1> - -<p>A function or method which returns a retainable object pointer type -may be marked as returning a retained value, signifying that the -caller expects to take ownership of a +1 retain count. This is done -by adding the <tt>ns_returns_retained</tt> attribute to the function or -method declaration, like so:</p> - -<pre>id foo(void) __attribute((ns_returns_retained)); -- (id) foo __attribute((ns_returns_retained));</pre> - -<p>This attribute is part of the type of the function or method.</p> - -<p>When returning from such a function or method, ARC retains the -value at the point of evaluation of the return statement, before -leaving all local scopes.</p> - -<p>When receiving a return result from such a function or method, ARC -releases the value at the end of the full-expression it is contained -within, subject to the usual optimizations for local values.</p> - -<div class="rationale"><p>Rationale: this formalizes direct transfers of -ownership from a callee to a caller. The most common scenario this -models is the retained return from <tt>init</tt>, <tt>alloc</tt>, -<tt>new</tt>, and <tt>copy</tt> methods, but there are other cases in -the frameworks. After optimization there are typically no extra -retains and releases required.</p></div> - -<p>Methods in -the <tt>alloc</tt>, <tt>copy</tt>, <tt>init</tt>, <tt>mutableCopy</tt>, -and <tt>new</tt> <a href="#family">families</a> are implicitly marked -<tt>__attribute__((ns_returns_retained))</tt>. This may be suppressed -by explicitly marking the -method <tt>__attribute__((ns_returns_not_retained))</tt>.</p> - -<p>It is undefined behavior if the method to which an Objective-C -message send statically resolves has different retain semantics on its -result from the method it dynamically resolves to. It is undefined -behavior if a block or function call is made through a static type -with different retain semantics on its result from the implementation -of the called block or function.</p> - -<div class="rationale"><p>Rationale: Mismatches with returned results -will cause over-retains or over-releases, depending on the direction. -Again, the rule about function calls is really just an application of -the existing C/C++ rule about calling functions through an -incompatible function type.</p></div> - -</div> <!-- objects.operands.retained-returns --> - -<div id="objects.operands.other-returns"> -<h1>Unretained return values</h1> - -<p>A method or function which returns a retainable object type but -does not return a retained value must ensure that the object is -still valid across the return boundary.</p> - -<p>When returning from such a function or method, ARC retains the -value at the point of evaluation of the return statement, then leaves -all local scopes, and then balances out the retain while ensuring that -the value lives across the call boundary. In the worst case, this may -involve an <tt>autorelease</tt>, but callers must not assume that the -value is actually in the autorelease pool.</p> - -<p>ARC performs no extra mandatory work on the caller side, although -it may elect to do something to shorten the lifetime of the returned -value.</p> - -<div class="rationale"><p>Rationale: it is common in non-ARC code to not -return an autoreleased value; therefore the convention does not force -either path. It is convenient to not be required to do unnecessary -retains and autoreleases; this permits optimizations such as eliding -retain/autoreleases when it can be shown that the original pointer -will still be valid at the point of return.</p></div> - -<p>A method or function may be marked -with <tt>__attribute__((ns_returns_autoreleased))</tt> to indicate -that it returns a pointer which is guaranteed to be valid at least as -long as the innermost autorelease pool. There are no additional -semantics enforced in the definition of such a method; it merely -enables optimizations in callers.</p> - -</div> <!-- objects.operands.other-returns --> - -<div id="objects.operands.casts"> -<h1>Bridged casts</h1> - -<p>A <span class="term">bridged cast</span> is a C-style cast -annotated with one of three keywords:</p> - -<ul> -<li><tt>(__bridge T) op</tt> casts the operand to the destination -type <tt>T</tt>. If <tt>T</tt> is a retainable object pointer type, -then <tt>op</tt> must have a non-retainable pointer type. -If <tt>T</tt> is a non-retainable pointer type, then <tt>op</tt> must -have a retainable object pointer type. Otherwise the cast is -ill-formed. There is no transfer of ownership, and ARC inserts -no retain operations.</li> - -<li><tt>(__bridge_retained T) op</tt> casts the operand, which must -have retainable object pointer type, to the destination type, which -must be a non-retainable pointer type. ARC retains the value, subject -to the usual optimizations on local values, and the recipient is -responsible for balancing that +1.</li> - -<li><tt>(__bridge_transfer T) op</tt> casts the operand, which must -have non-retainable pointer type, to the destination type, which must -be a retainable object pointer type. ARC will release the value at -the end of the enclosing full-expression, subject to the usual -optimizations on local values.</li> -</ul> - -<p>These casts are required in order to transfer objects in and out of -ARC control; see the rationale in the section -on <a href="#objects.restrictions.conversion">conversion of retainable -object pointers</a>.</p> - -<p>Using a <tt>__bridge_retained</tt> or <tt>__bridge_transfer</tt> -cast purely to convince ARC to emit an unbalanced retain or release, -respectively, is poor form.</p> - -</div> <!-- objects.operands.casts --> - -</div> <!-- objects.operands --> - -<div id="objects.restrictions"> -<h1>Restrictions</h1> - -<div id="objects.restrictions.conversion"> -<h1>Conversion of retainable object pointers</h1> - -<p>In general, a program which attempts to implicitly or explicitly -convert a value of retainable object pointer type to any -non-retainable type, or vice-versa, is ill-formed. For example, an -Objective-C object pointer shall not be converted to <tt>void*</tt>. -As an exception, cast to <tt>intptr_t</tt> is allowed because such -casts are not transferring ownership. The <a href="#objects.operands.casts">bridged -casts</a> may be used to perform these conversions where -necessary.</p> - -<div class="rationale"><p>Rationale: we cannot ensure the correct -management of the lifetime of objects if they may be freely passed -around as unmanaged types. The bridged casts are provided so that the -programmer may explicitly describe whether the cast transfers control -into or out of ARC.</p></div> - -<p>However, the following exceptions apply.</p> - -</div> <!-- objects.restrictions.conversion --> - -<div id="objects.restrictions.conversion-exception-known"> -<h1>Conversion to retainable object pointer type of - expressions with known semantics</h1> - -<p><span class="revision"><span class="whenRevised">[beginning Apple - 4.0, LLVM 3.1]</span> These exceptions have been greatly expanded; - they previously applied only to a much-reduced subset which is - difficult to categorize but which included null pointers, message - sends (under the given rules), and the various global constants.</span></p> - -<p>An unbridged conversion to a retainable object pointer type from a -type other than a retainable object pointer type is ill-formed, as -discussed above, unless the operand of the cast has a syntactic form -which is known retained, known unretained, or known -retain-agnostic.</p> - -<p>An expression is <span class="term">known retain-agnostic</span> if -it is:</p> -<ul> -<li>an Objective-C string literal,</li> -<li>a load from a <tt>const</tt> system global variable of -<a href="#misc.c-retainable">C retainable pointer type</a>, or</li> -<li>a null pointer constant.</li> -</ul> - -<p>An expression is <span class="term">known unretained</span> if it -is an rvalue of <a href="#misc.c-retainable">C retainable -pointer type</a> and it is:</p> -<ul> -<li>a direct call to a function, and either that function has the - <tt>cf_returns_not_retained</tt> attribute or it is an - <a href="#misc.c-retainable.audit">audited</a> function that does not - have the <tt>cf_returns_retained</tt> attribute and does not follow - the create/copy naming convention,</li> -<li>a message send, and the declared method either has - the <tt>cf_returns_not_retained</tt> attribute or it has neither - the <tt>cf_returns_retained</tt> attribute nor a - <a href="#family">selector family</a> that implies a retained - result.</li> -</ul> - -<p>An expression is <span class="term">known retained</span> if it is -an rvalue of <a href="#misc.c-retainable">C retainable pointer type</a> -and it is:</p> -<ul> -<li>a message send, and the declared method either has the - <tt>cf_returns_retained</tt> attribute, or it does not have - the <tt>cf_returns_not_retained</tt> attribute but it does have a - <a href="#family">selector family</a> that implies a retained - result.</li> -</ul> - -<p>Furthermore:</p> -<ul> -<li>a comma expression is classified according to its right-hand side,</li> -<li>a statement expression is classified according to its result -expression, if it has one,</li> -<li>an lvalue-to-rvalue conversion applied to an Objective-C property -lvalue is classified according to the underlying message send, and</li> -<li>a conditional operator is classified according to its second and -third operands, if they agree in classification, or else the other -if one is known retain-agnostic.</li> -</ul> - -<p>If the cast operand is known retained, the conversion is treated as -a <tt>__bridge_transfer</tt> cast. If the cast operand is known -unretained or known retain-agnostic, the conversion is treated as -a <tt>__bridge</tt> cast.</p> - -<div class="rationale"><p>Rationale: Bridging casts are annoying. -Absent the ability to completely automate the management of CF -objects, however, we are left with relatively poor attempts to reduce -the need for a glut of explicit bridges. Hence these rules.</p> - -<p>We've so far consciously refrained from implicitly turning retained -CF results from function calls into <tt>__bridge_transfer</tt> casts. -The worry is that some code patterns — for example, creating a -CF value, assigning it to an ObjC-typed local, and then -calling <tt>CFRelease</tt> when done — are a bit too likely to -be accidentally accepted, leading to mysterious behavior.</p></div> - -</div> <!-- objects.restrictions.conversion-exception-known --> - -<div id="objects.restrictions.conversion-exception-contextual"> -<h1>Conversion from retainable object pointer type in certain contexts</h1> - -<p><span class="revision"><span class="whenRevised">[beginning Apple - 4.0, LLVM 3.1]</span></span></p> - -<p>If an expression of retainable object pointer type is explicitly -cast to a <a href="#misc.c-retainable">C retainable pointer type</a>, -the program is ill-formed as discussed above unless the result is -immediately used:</p> - -<ul> -<li>to initialize a parameter in an Objective-C message send where the -parameter is not marked with the <tt>cf_consumed</tt> attribute, or</li> -<li>to initialize a parameter in a direct call to -an <a href="#misc.c-retainable.audit">audited</a> function where the -parameter is not marked with the <tt>cf_consumed</tt> attribute.</li> -</ul> - -<div class="rationale"><p>Rationale: Consumed parameters are left out -because ARC would naturally balance them with a retain, which was -judged too treacherous. This is in part because several of the most -common consuming functions are in the <tt>Release</tt> family, and it -would be quite unfortunate for explicit releases to be silently -balanced out in this way.</p></div> - -</div> <!-- objects.restrictions.conversion-exception-contextual --> - -</div> <!-- objects.restrictions --> - -</div> <!-- objects --> - -<div id="ownership"> -<h1>Ownership qualification</h1> - -<p>This section describes the behavior of <em>objects</em> of -retainable object pointer type; that is, locations in memory which -store retainable object pointers.</p> - -<p>A type is a <span class="term">retainable object owner type</span> -if it is a retainable object pointer type or an array type whose -element type is a retainable object owner type.</p> - -<p>An <span class="term">ownership qualifier</span> is a type -qualifier which applies only to retainable object owner types. An array type is -ownership-qualified according to its element type, and adding an ownership -qualifier to an array type so qualifies its element type.</p> - -<p>A program is ill-formed if it attempts to apply an ownership qualifier -to a type which is already ownership-qualified, even if it is the same -qualifier. There is a single exception to this rule: an ownership qualifier -may be applied to a substituted template type parameter, which overrides the -ownership qualifier provided by the template argument.</p> - -<p>Except as described under -the <a href="#ownership.inference">inference rules</a>, a program is -ill-formed if it attempts to form a pointer or reference type to a -retainable object owner type which lacks an ownership qualifier.</p> - -<div class="rationale"><p>Rationale: these rules, together with the -inference rules, ensure that all objects and lvalues of retainable -object pointer type have an ownership qualifier. The ability to override an ownership qualifier during template substitution is required to counteract the <a href="#ownership.inference.template_arguments">inference of <tt>__strong</tt> for template type arguments</a>. </p></div> - -<p>There are four ownership qualifiers:</p> - -<ul> -<li><tt>__autoreleasing</tt></li> -<li><tt>__strong</tt></li> -<li><tt>__unsafe_unretained</tt></li> -<li><tt>__weak</tt></li> -</ul> - -<p>A type is <span class="term">nontrivially ownership-qualified</span> -if it is qualified with <tt>__autoreleasing</tt>, <tt>__strong</tt>, or -<tt>__weak</tt>.</p> - -<div id="ownership.spelling"> -<h1>Spelling</h1> - -<p>The names of the ownership qualifiers are reserved for the -implementation. A program may not assume that they are or are not -implemented with macros, or what those macros expand to.</p> - -<p>An ownership qualifier may be written anywhere that any other type -qualifier may be written.</p> - -<p>If an ownership qualifier appears in -the <i>declaration-specifiers</i>, the following rules apply:</p> - -<ul> -<li>if the type specifier is a retainable object owner type, the -qualifier applies to that type;</li> -<li>if the outermost non-array part of the declarator is a pointer or -block pointer, the qualifier applies to that type;</li> -<li>otherwise the program is ill-formed.</li> -</ul> - -<p>If an ownership qualifier appears on the declarator name, or on the -declared object, it is applied to outermost pointer or block-pointer -type.</p> - -<p>If an ownership qualifier appears anywhere else in a declarator, it -applies to the type there.</p> - -<div id="ownership.spelling.property"> -<h1>Property declarations</h1> - -<p>A property of retainable object pointer type may have ownership. -If the property's type is ownership-qualified, then the property has -that ownership. If the property has one of the following modifiers, -then the property has the corresponding ownership. A property is -ill-formed if it has conflicting sources of ownership, or if it has -redundant ownership modifiers, or if it has <tt>__autoreleasing</tt> -ownership.</p> - -<ul> -<li><tt>assign</tt> implies <tt>__unsafe_unretained</tt> ownership.</li> -<li><tt>copy</tt> implies <tt>__strong</tt> ownership, as well as the - usual behavior of copy semantics on the setter.</li> -<li><tt>retain</tt> implies <tt>__strong</tt> ownership.</li> -<li><tt>strong</tt> implies <tt>__strong</tt> ownership.</li> -<li><tt>unsafe_unretained</tt> implies <tt>__unsafe_unretained</tt> - ownership.</li> -<li><tt>weak</tt> implies <tt>__weak</tt> ownership.</li> -</ul> - -<p>With the exception of <tt>weak</tt>, these modifiers are available -in non-ARC modes.</p> - -<p>A property's specified ownership is preserved in its metadata, but -otherwise the meaning is purely conventional unless the property is -synthesized. If a property is synthesized, then the -<span class="term">associated instance variable</span> is the -instance variable which is named, possibly implicitly, by the -<tt>@synthesize</tt> declaration. If the associated instance variable -already exists, then its ownership qualification must equal the -ownership of the property; otherwise, the instance variable is created -with that ownership qualification.</p> - -<p>A property of retainable object pointer type which is synthesized -without a source of ownership has the ownership of its associated -instance variable, if it already exists; otherwise, -<span class="revision"><span class="whenRevised">[beginning Apple 3.1, -LLVM 3.1]</span> its ownership is implicitly <tt>strong</tt></span>. -Prior to this revision, it was ill-formed to synthesize such a -property.</p> - -<div class="rationale"><p>Rationale: using <tt>strong</tt> by default -is safe and consistent with the generic ARC rule about -<a href="#ownership.inference.variables">inferring ownership</a>. It -is, unfortunately, inconsistent with the non-ARC rule which states -that such properties are implicitly <tt>assign</tt>. However, that -rule is clearly untenable in ARC, since it leads to default-unsafe -code. The main merit to banning the properties is to avoid confusion -with non-ARC practice, which did not ultimately strike us as -sufficient to justify requiring extra syntax and (more importantly) -forcing novices to understand ownership rules just to declare a -property when the default is so reasonable. Changing the rule away -from non-ARC practice was acceptable because we had conservatively -banned the synthesis in order to give ourselves exactly this -leeway.</p></div> - -<p>Applying <tt>__attribute__((NSObject))</tt> to a property not of -retainable object pointer type has the same behavior it does outside -of ARC: it requires the property type to be some sort of pointer and -permits the use of modifiers other than <tt>assign</tt>. These -modifiers only affect the synthesized getter and setter; direct -accesses to the ivar (even if synthesized) still have primitive -semantics, and the value in the ivar will not be automatically -released during deallocation.</p> - -</div> <!-- ownership.spelling.property --> - -</div> <!-- ownership.spelling --> - -<div id="ownership.semantics"> -<h1>Semantics</h1> - -<p>There are five <span class="term">managed operations</span> which -may be performed on an object of retainable object pointer type. Each -qualifier specifies different semantics for each of these operations. -It is still undefined behavior to access an object outside of its -lifetime.</p> - -<p>A load or store with <q>primitive semantics</q> has the same -semantics as the respective operation would have on an <tt>void*</tt> -lvalue with the same alignment and non-ownership qualification.</p> - -<p><span class="term">Reading</span> occurs when performing a -lvalue-to-rvalue conversion on an object lvalue.</p> - -<ul> -<li>For <tt>__weak</tt> objects, the current pointee is retained and -then released at the end of the current full-expression. This must -execute atomically with respect to assignments and to the final -release of the pointee.</li> -<li>For all other objects, the lvalue is loaded with primitive -semantics.</li> -</ul> - -<p><span class="term">Assignment</span> occurs when evaluating -an assignment operator. The semantics vary based on the qualification:</p> -<ul> -<li>For <tt>__strong</tt> objects, the new pointee is first retained; -second, the lvalue is loaded with primitive semantics; third, the new -pointee is stored into the lvalue with primitive semantics; and -finally, the old pointee is released. This is not performed -atomically; external synchronization must be used to make this safe in -the face of concurrent loads and stores.</li> -<li>For <tt>__weak</tt> objects, the lvalue is updated to point to the -new pointee, unless the new pointee is an object currently undergoing -deallocation, in which case the lvalue is updated to a null pointer. -This must execute atomically with respect to other assignments to the -object, to reads from the object, and to the final release of the new -pointee.</li> -<li>For <tt>__unsafe_unretained</tt> objects, the new pointee is -stored into the lvalue using primitive semantics.</li> -<li>For <tt>__autoreleasing</tt> objects, the new pointee is retained, -autoreleased, and stored into the lvalue using primitive semantics.</li> -</ul> - -<p><span class="term">Initialization</span> occurs when an object's -lifetime begins, which depends on its storage duration. -Initialization proceeds in two stages:</p> -<ol> -<li>First, a null pointer is stored into the lvalue using primitive -semantics. This step is skipped if the object -is <tt>__unsafe_unretained</tt>.</li> -<li>Second, if the object has an initializer, that expression is -evaluated and then assigned into the object using the usual assignment -semantics.</li> -</ol> - -<p><span class="term">Destruction</span> occurs when an object's -lifetime ends. In all cases it is semantically equivalent to -assigning a null pointer to the object, with the proviso that of -course the object cannot be legally read after the object's lifetime -ends.</p> - -<p><span class="term">Moving</span> occurs in specific situations -where an lvalue is <q>moved from</q>, meaning that its current pointee -will be used but the object may be left in a different (but still -valid) state. This arises with <tt>__block</tt> variables and rvalue -references in C++. For <tt>__strong</tt> lvalues, moving is equivalent -to loading the lvalue with primitive semantics, writing a null pointer -to it with primitive semantics, and then releasing the result of the -load at the end of the current full-expression. For all other -lvalues, moving is equivalent to reading the object.</p> - -</div> <!-- ownership.semantics --> - -<div id="ownership.restrictions"> -<h1>Restrictions</h1> - -<div id="ownership.restrictions.weak"> -<h1>Weak-unavailable types</h1> - -<p>It is explicitly permitted for Objective-C classes to not -support <tt>__weak</tt> references. It is undefined behavior to -perform an operation with weak assignment semantics with a pointer to -an Objective-C object whose class does not support <tt>__weak</tt> -references.</p> - -<div class="rationale"><p>Rationale: historically, it has been -possible for a class to provide its own reference-count implementation -by overriding <tt>retain</tt>, <tt>release</tt>, etc. However, weak -references to an object require coordination with its class's -reference-count implementation because, among other things, weak loads -and stores must be atomic with respect to the final release. -Therefore, existing custom reference-count implementations will -generally not support weak references without additional effort. This -is unavoidable without breaking binary compatibility.</p></div> - -<p>A class may indicate that it does not support weak references by -providing the <tt>objc_arc_weak_unavailable</tt> attribute on the -class's interface declaration. A retainable object pointer type -is <span class="term">weak-unavailable</span> if is a pointer to an -(optionally protocol-qualified) Objective-C class <tt>T</tt> -where <tt>T</tt> or one of its superclasses has -the <tt>objc_arc_weak_unavailable</tt> attribute. A program is -ill-formed if it applies the <tt>__weak</tt> ownership qualifier to a -weak-unavailable type or if the value operand of a weak assignment -operation has a weak-unavailable type.</p> -</div> <!-- ownership.restrictions.weak --> - -<div id="ownership.restrictions.autoreleasing"> -<h1>Storage duration of <tt>__autoreleasing</tt> objects</h1> - -<p>A program is ill-formed if it declares an <tt>__autoreleasing</tt> -object of non-automatic storage duration. A program is ill-formed -if it captures an <tt>__autoreleasing</tt> object in a block or, -unless by reference, in a C++11 lambda.</p> - -<div class="rationale"><p>Rationale: autorelease pools are tied to the -current thread and scope by their nature. While it is possible to -have temporary objects whose instance variables are filled with -autoreleased objects, there is no way that ARC can provide any sort of -safety guarantee there.</p></div> - -<p>It is undefined behavior if a non-null pointer is assigned to -an <tt>__autoreleasing</tt> object while an autorelease pool is in -scope and then that object is read after the autorelease pool's scope -is left.</p> - -</div> - -<div id="ownership.restrictions.conversion.indirect"> -<h1>Conversion of pointers to ownership-qualified types</h1> - -<p>A program is ill-formed if an expression of type <tt>T*</tt> is -converted, explicitly or implicitly, to the type <tt>U*</tt>, -where <tt>T</tt> and <tt>U</tt> have different ownership -qualification, unless:</p> -<ul> -<li><tt>T</tt> is qualified with <tt>__strong</tt>, - <tt>__autoreleasing</tt>, or <tt>__unsafe_unretained</tt>, and - <tt>U</tt> is qualified with both <tt>const</tt> and - <tt>__unsafe_unretained</tt>; or</li> -<li>either <tt>T</tt> or <tt>U</tt> is <tt>cv void</tt>, where -<tt>cv</tt> is an optional sequence of non-ownership qualifiers; or</li> -<li>the conversion is requested with a <tt>reinterpret_cast</tt> in - Objective-C++; or</li> -<li>the conversion is a -well-formed <a href="#ownership.restrictions.pass_by_writeback">pass-by-writeback</a>.</li> -</ul> - -<p>The analogous rule applies to <tt>T&</tt> and <tt>U&</tt> in -Objective-C++.</p> - -<div class="rationale"><p>Rationale: these rules provide a reasonable -level of type-safety for indirect pointers, as long as the underlying -memory is not deallocated. The conversion to <tt>const -__unsafe_unretained</tt> is permitted because the semantics of reads -are equivalent across all these ownership semantics, and that's a very -useful and common pattern. The interconversion with <tt>void*</tt> is -useful for allocating memory or otherwise escaping the type system, -but use it carefully. <tt>reinterpret_cast</tt> is considered to be -an obvious enough sign of taking responsibility for any -problems.</p></div> - -<p>It is undefined behavior to access an ownership-qualified object -through an lvalue of a differently-qualified type, except that any -non-<tt>__weak</tt> object may be read through -an <tt>__unsafe_unretained</tt> lvalue.</p> - -<p>It is undefined behavior if a managed operation is performed on -a <tt>__strong</tt> or <tt>__weak</tt> object without a guarantee that -it contains a primitive zero bit-pattern, or if the storage for such -an object is freed or reused without the object being first assigned a -null pointer.</p> - -<div class="rationale"><p>Rationale: ARC cannot differentiate between -an assignment operator which is intended to <q>initialize</q> dynamic -memory and one which is intended to potentially replace a value. -Therefore the object's pointer must be valid before letting ARC at it. -Similarly, C and Objective-C do not provide any language hooks for -destroying objects held in dynamic memory, so it is the programmer's -responsibility to avoid leaks (<tt>__strong</tt> objects) and -consistency errors (<tt>__weak</tt> objects).</p> - -<p>These requirements are followed automatically in Objective-C++ when -creating objects of retainable object owner type with <tt>new</tt> -or <tt>new[]</tt> and destroying them with <tt>delete</tt>, -<tt>delete[]</tt>, or a pseudo-destructor expression. Note that -arrays of nontrivially-ownership-qualified type are not ABI compatible -with non-ARC code because the element type is non-POD: such arrays -that are <tt>new[]</tt>'d in ARC translation units cannot -be <tt>delete[]</tt>'d in non-ARC translation units and -vice-versa.</p></div> - -</div> - -<div id="ownership.restrictions.pass_by_writeback"> -<h1>Passing to an out parameter by writeback</h1> - -<p>If the argument passed to a parameter of type -<tt>T __autoreleasing *</tt> has type <tt>U oq *</tt>, -where <tt>oq</tt> is an ownership qualifier, then the argument is a -candidate for <span class="term">pass-by-writeback</span> if:</p> - -<ul> -<li><tt>oq</tt> is <tt>__strong</tt> or <tt>__weak</tt>, and</li> -<li>it would be legal to initialize a <tt>T __strong *</tt> with -a <tt>U __strong *</tt>.</li> -</ul> - -<p>For purposes of overload resolution, an implicit conversion -sequence requiring a pass-by-writeback is always worse than an -implicit conversion sequence not requiring a pass-by-writeback.</p> - -<p>The pass-by-writeback is ill-formed if the argument expression does -not have a legal form:</p> - -<ul> -<li><tt>&var</tt>, where <tt>var</tt> is a scalar variable of -automatic storage duration with retainable object pointer type</li> -<li>a conditional expression where the second and third operands are -both legal forms</li> -<li>a cast whose operand is a legal form</li> -<li>a null pointer constant</li> -</ul> - -<div class="rationale"><p>Rationale: the restriction in the form of -the argument serves two purposes. First, it makes it impossible to -pass the address of an array to the argument, which serves to protect -against an otherwise serious risk of mis-inferring an <q>array</q> -argument as an out-parameter. Second, it makes it much less likely -that the user will see confusing aliasing problems due to the -implementation, below, where their store to the writeback temporary is -not immediately seen in the original argument variable.</p></div> - -<p>A pass-by-writeback is evaluated as follows:</p> -<ol> -<li>The argument is evaluated to yield a pointer <tt>p</tt> of - type <tt>U oq *</tt>.</li> -<li>If <tt>p</tt> is a null pointer, then a null pointer is passed as - the argument, and no further work is required for the pass-by-writeback.</li> -<li>Otherwise, a temporary of type <tt>T __autoreleasing</tt> is - created and initialized to a null pointer.</li> -<li>If the parameter is not an Objective-C method parameter marked - <tt>out</tt>, then <tt>*p</tt> is read, and the result is written - into the temporary with primitive semantics.</li> -<li>The address of the temporary is passed as the argument to the - actual call.</li> -<li>After the call completes, the temporary is loaded with primitive - semantics, and that value is assigned into <tt>*p</tt>.</li> -</ol> - -<div class="rationale"><p>Rationale: this is all admittedly -convoluted. In an ideal world, we would see that a local variable is -being passed to an out-parameter and retroactively modify its type to -be <tt>__autoreleasing</tt> rather than <tt>__strong</tt>. This would -be remarkably difficult and not always well-founded under the C type -system. However, it was judged unacceptably invasive to require -programmers to write <tt>__autoreleasing</tt> on all the variables -they intend to use for out-parameters. This was the least bad -solution.</p></div> - -</div> - -<div id="ownership.restrictions.records"> -<h1>Ownership-qualified fields of structs and unions</h1> - -<p>A program is ill-formed if it declares a member of a C struct or -union to have a nontrivially ownership-qualified type.</p> - -<div class="rationale"><p>Rationale: the resulting type would be -non-POD in the C++ sense, but C does not give us very good language -tools for managing the lifetime of aggregates, so it is more -convenient to simply forbid them. It is still possible to manage this -with a <tt>void*</tt> or an <tt>__unsafe_unretained</tt> -object.</p></div> - -<p>This restriction does not apply in Objective-C++. However, -nontrivally ownership-qualified types are considered non-POD: in C++11 -terms, they are not trivially default constructible, copy -constructible, move constructible, copy assignable, move assignable, -or destructible. It is a violation of C++'s One Definition Rule to use -a class outside of ARC that, under ARC, would have a nontrivially -ownership-qualified member.</p> - -<div class="rationale"><p>Rationale: unlike in C, we can express all -the necessary ARC semantics for ownership-qualified subobjects as -suboperations of the (default) special member functions for the class. -These functions then become non-trivial. This has the non-obvious -result that the class will have a non-trivial copy constructor and -non-trivial destructor; if this would not normally be true outside of -ARC, objects of the type will be passed and returned in an -ABI-incompatible manner.</p></div> - -</div> - -</div> - -<div id="ownership.inference"> -<h1>Ownership inference</h1> - -<div id="ownership.inference.variables"> -<h1>Objects</h1> - -<p>If an object is declared with retainable object owner type, but -without an explicit ownership qualifier, its type is implicitly -adjusted to have <tt>__strong</tt> qualification.</p> - -<p>As a special case, if the object's base type is <tt>Class</tt> -(possibly protocol-qualified), the type is adjusted to -have <tt>__unsafe_unretained</tt> qualification instead.</p> - -</div> - -<div id="ownership.inference.indirect_parameters"> -<h1>Indirect parameters</h1> - -<p>If a function or method parameter has type <tt>T*</tt>, where -<tt>T</tt> is an ownership-unqualified retainable object pointer type, -then:</p> - -<ul> -<li>if <tt>T</tt> is <tt>const</tt>-qualified or <tt>Class</tt>, then -it is implicitly qualified with <tt>__unsafe_unretained</tt>;</li> -<li>otherwise, it is implicitly qualified -with <tt>__autoreleasing</tt>.</li> -</ul> - -<div class="rationale"><p>Rationale: <tt>__autoreleasing</tt> exists -mostly for this case, the Cocoa convention for out-parameters. Since -a pointer to <tt>const</tt> is obviously not an out-parameter, we -instead use a type more useful for passing arrays. If the user -instead intends to pass in a <em>mutable</em> array, inferring -<tt>__autoreleasing</tt> is the wrong thing to do; this directs some -of the caution in the following rules about writeback.</p></div> - -<p>Such a type written anywhere else would be ill-formed by the -general rule requiring ownership qualifiers.</p> - -<p>This rule does not apply in Objective-C++ if a parameter's type is -dependent in a template pattern and is only <em>instantiated</em> to -a type which would be a pointer to an unqualified retainable object -pointer type. Such code is still ill-formed.</p> - -<div class="rationale"><p>Rationale: the convention is very unlikely -to be intentional in template code.</p></div> - -</div> <!-- ownership.inference.indirect_parameters --> - -<div id="ownership.inference.template_arguments"> -<h1>Template arguments</h1> - -<p>If a template argument for a template type parameter is an -retainable object owner type that does not have an explicit ownership -qualifier, it is adjusted to have <tt>__strong</tt> -qualification. This adjustment occurs regardless of whether the -template argument was deduced or explicitly specified. </p> - -<div class="rationale"><p>Rationale: <tt>__strong</tt> is a useful default for containers (e.g., <tt>std::vector<id></tt>), which would otherwise require explicit qualification. Moreover, unqualified retainable object pointer types are unlikely to be useful within templates, since they generally need to have a qualifier applied to the before being used.</p></div> - -</div> <!-- ownership.inference.template_arguments --> -</div> <!-- ownership.inference --> -</div> <!-- ownership --> - - -<div id="family"> -<h1>Method families</h1> - -<p>An Objective-C method may fall into a <span class="term">method -family</span>, which is a conventional set of behaviors ascribed to it -by the Cocoa conventions.</p> - -<p>A method is in a certain method family if:</p> -<ul> -<li>it has a <tt>objc_method_family</tt> attribute placing it in that - family; or if not that,</li> -<li>it does not have an <tt>objc_method_family</tt> attribute placing - it in a different or no family, and</li> -<li>its selector falls into the corresponding selector family, and</li> -<li>its signature obeys the added restrictions of the method family.</li> -</ul> - -<p>A selector is in a certain selector family if, ignoring any leading -underscores, the first component of the selector either consists -entirely of the name of the method family or it begins with that name -followed by a character other than a lowercase letter. For -example, <tt>_perform:with:</tt> and <tt>performWith:</tt> would fall -into the <tt>perform</tt> family (if we recognized one), -but <tt>performing:with</tt> would not.</p> - -<p>The families and their added restrictions are:</p> - -<ul> -<li><tt>alloc</tt> methods must return a retainable object pointer type.</li> -<li><tt>copy</tt> methods must return a retainable object pointer type.</li> -<li><tt>mutableCopy</tt> methods must return a retainable object pointer type.</li> -<li><tt>new</tt> methods must return a retainable object pointer type.</li> -<li><tt>init</tt> methods must be instance methods and must return an -Objective-C pointer type. Additionally, a program is ill-formed if it -declares or contains a call to an <tt>init</tt> method whose return -type is neither <tt>id</tt> nor a pointer to a super-class or -sub-class of the declaring class (if the method was declared on -a class) or the static receiver type of the call (if it was declared -on a protocol). - -<div class="rationale"><p>Rationale: there are a fair number of existing -methods with <tt>init</tt>-like selectors which nonetheless don't -follow the <tt>init</tt> conventions. Typically these are either -accidental naming collisions or helper methods called during -initialization. Because of the peculiar retain/release behavior -of <tt>init</tt> methods, it's very important not to treat these -methods as <tt>init</tt> methods if they aren't meant to be. It was -felt that implicitly defining these methods out of the family based on -the exact relationship between the return type and the declaring class -would be much too subtle and fragile. Therefore we identify a small -number of legitimate-seeming return types and call everything else an -error. This serves the secondary purpose of encouraging programmers -not to accidentally give methods names in the <tt>init</tt> family.</p> - -<p>Note that a method with an <tt>init</tt>-family selector which -returns a non-Objective-C type (e.g. <tt>void</tt>) is perfectly -well-formed; it simply isn't in the <tt>init</tt> family.</p></div> -</li> -</ul> - -<p>A program is ill-formed if a method's declarations, -implementations, and overrides do not all have the same method -family.</p> - -<div id="family.attribute"> -<h1>Explicit method family control</h1> - -<p>A method may be annotated with the <tt>objc_method_family</tt> -attribute to precisely control which method family it belongs to. If -a method in an <tt>@implementation</tt> does not have this attribute, -but there is a method declared in the corresponding <tt>@interface</tt> -that does, then the attribute is copied to the declaration in the -<tt>@implementation</tt>. The attribute is available outside of ARC, -and may be tested for with the preprocessor query -<tt>__has_attribute(objc_method_family)</tt>.</p> - -<p>The attribute is spelled -<tt>__attribute__((objc_method_family(<i>family</i>)))</tt>. -If <i>family</i> is <tt>none</tt>, the method has no family, even if -it would otherwise be considered to have one based on its selector and -type. Otherwise, <i>family</i> must be one -of <tt>alloc</tt>, <tt>copy</tt>, <tt>init</tt>, -<tt>mutableCopy</tt>, or <tt>new</tt>, in which case the method is -considered to belong to the corresponding family regardless of its -selector. It is an error if a method that is explicitly added to a -family in this way does not meet the requirements of the family other -than the selector naming convention.</p> - -<div class="rationale"><p>Rationale: the rules codified in this document -describe the standard conventions of Objective-C. However, as these -conventions have not heretofore been enforced by an unforgiving -mechanical system, they are only imperfectly kept, especially as they -haven't always even been precisely defined. While it is possible to -define low-level ownership semantics with attributes like -<tt>ns_returns_retained</tt>, this attribute allows the user to -communicate semantic intent, which is of use both to ARC (which, e.g., -treats calls to <tt>init</tt> specially) and the static analyzer.</p></div> -</div> - -<div id="family.semantics"> -<h1>Semantics of method families</h1> - -<p>A method's membership in a method family may imply non-standard -semantics for its parameters and return type.</p> - -<p>Methods in the <tt>alloc</tt>, <tt>copy</tt>, <tt>mutableCopy</tt>, -and <tt>new</tt> families — that is, methods in all the -currently-defined families except <tt>init</tt> — implicitly -<a href="#objects.operands.retained_returns">return a retained -object</a> as if they were annotated with -the <tt>ns_returns_retained</tt> attribute. This can be overridden by -annotating the method with either of -the <tt>ns_returns_autoreleased</tt> or -<tt>ns_returns_not_retained</tt> attributes.</p> - -<p>Properties also follow same naming rules as methods. This means that -those in the <tt>alloc</tt>, <tt>copy</tt>, <tt>mutableCopy</tt>, -and <tt>new</tt> families provide access to -<a href="#objects.operands.retained_returns">retained objects</a>. -This can be overridden by annotating the property with -<tt>ns_returns_not_retained</tt> attribute.</p> - -<div id="family.semantics.init"> -<h1>Semantics of <tt>init</tt></h1> -<p>Methods in the <tt>init</tt> family implicitly -<a href="#objects.operands.consumed">consume</a> their <tt>self</tt> -parameter and <a href="#objects.operands.retained_returns">return a -retained object</a>. Neither of these properties can be altered -through attributes.</p> - -<p>A call to an <tt>init</tt> method with a receiver that is either -<tt>self</tt> (possibly parenthesized or casted) or <tt>super</tt> is -called a <span class="term">delegate init call</span>. It is an error -for a delegate init call to be made except from an <tt>init</tt> -method, and excluding blocks within such methods.</p> - -<p>As an exception to the <a href="misc.self">usual rule</a>, the -variable <tt>self</tt> is mutable in an <tt>init</tt> method and has -the usual semantics for a <tt>__strong</tt> variable. However, it is -undefined behavior and the program is ill-formed, no diagnostic -required, if an <tt>init</tt> method attempts to use the previous -value of <tt>self</tt> after the completion of a delegate init call. -It is conventional, but not required, for an <tt>init</tt> method to -return <tt>self</tt>.</p> - -<p>It is undefined behavior for a program to cause two or more calls -to <tt>init</tt> methods on the same object, except that -each <tt>init</tt> method invocation may perform at most one delegate -init call.</p> - -</div> <!-- family.semantics.init --> - -<div id="family.semantics.result_type"> -<h1>Related result types</h1> - -<p>Certain methods are candidates to have <span class="term">related -result types</span>:</p> -<ul> -<li>class methods in the <tt>alloc</tt> and <tt>new</tt> method families</li> -<li>instance methods in the <tt>init</tt> family</li> -<li>the instance method <tt>self</tt></li> -<li>outside of ARC, the instance methods <tt>retain</tt> and <tt>autorelease</tt></li> -</ul> - -<p>If the formal result type of such a method is <tt>id</tt> or -protocol-qualified <tt>id</tt>, or a type equal to the declaring class -or a superclass, then it is said to have a related result type. In -this case, when invoked in an explicit message send, it is assumed to -return a type related to the type of the receiver:</p> - -<ul> -<li>if it is a class method, and the receiver is a class -name <tt>T</tt>, the message send expression has type <tt>T*</tt>; -otherwise</li> -<li>if it is an instance method, and the receiver has type <tt>T</tt>, -the message send expression has type <tt>T</tt>; otherwise</li> -<li>the message send expression has the normal result type of the -method.</li> -</ul> - -<p>This is a new rule of the Objective-C language and applies outside -of ARC.</p> - -<div class="rationale"><p>Rationale: ARC's automatic code emission is -more prone than most code to signature errors, i.e. errors where a -call was emitted against one method signature, but the implementing -method has an incompatible signature. Having more precise type -information helps drastically lower this risk, as well as catching -a number of latent bugs.</p></div> - -</div> <!-- family.semantics.result_type --> -</div> <!-- family.semantics --> -</div> <!-- family --> - -<div id="optimization"> -<h1>Optimization</h1> - -<p>ARC applies aggressive rules for the optimization of local -behavior. These rules are based around a core assumption of -<span class="term">local balancing</span>: that other code will -perform retains and releases as necessary (and only as necessary) for -its own safety, and so the optimizer does not need to consider global -properties of the retain and release sequence. For example, if a -retain and release immediately bracket a call, the optimizer can -delete the retain and release on the assumption that the called -function will not do a constant number of unmotivated releases -followed by a constant number of <q>balancing</q> retains, such that -the local retain/release pair is the only thing preventing the called -function from ending up with a dangling reference.</p> - -<p>The optimizer assumes that when a new value enters local control, -e.g. from a load of a non-local object or as the result of a function -call, it is instaneously valid. Subsequently, a retain and release of -a value are necessary on a computation path only if there is a use of -that value before the release and after any operation which might -cause a release of the value (including indirectly or non-locally), -and only if the value is not demonstrably already retained.</p> - -<p>The complete optimization rules are quite complicated, but it would -still be useful to document them here.</p> - -<div id="optimization.precise"> -<h1>Precise lifetime semantics</h1> - -<p>In general, ARC maintains an invariant that a retainable object -pointer held in a <tt>__strong</tt> object will be retained for the -full formal lifetime of the object. Objects subject to this invariant -have <span class="term">precise lifetime semantics</span>.</p> - -<p>By default, local variables of automatic storage duration do not -have precise lifetime semantics. Such objects are simply strong -references which hold values of retainable object pointer type, and -these values are still fully subject to the optimizations on values -under local control.</p> - -<div class="rationale"><p>Rationale: applying these precise-lifetime -semantics strictly would be prohibitive. Many useful optimizations -that might theoretically decrease the lifetime of an object would be -rendered impossible. Essentially, it promises too much.</p></div> - -<p>A local variable of retainable object owner type and automatic -storage duration may be annotated with the <tt>objc_precise_lifetime</tt> -attribute to indicate that it should be considered to be an object -with precise lifetime semantics.</p> - -<div class="rationale"><p>Rationale: nonetheless, it is sometimes -useful to be able to force an object to be released at a precise time, -even if that object does not appear to be used. This is likely to be -uncommon enough that the syntactic weight of explicitly requesting -these semantics will not be burdensome, and may even make the code -clearer.</p></div> - -</div> <!-- optimization.precise --> - -</div> <!-- optimization --> - -<div id="misc"> -<h1>Miscellaneous</h1> - -<div id="misc.special_methods"> -<h1>Special methods</h1> - -<div id="misc.special_methods.retain"> -<h1>Memory management methods</h1> - -<p>A program is ill-formed if it contains a method definition, message -send, or <tt>@selector</tt> expression for any of the following -selectors:</p> -<ul> -<li><tt>autorelease</tt></li> -<li><tt>release</tt></li> -<li><tt>retain</tt></li> -<li><tt>retainCount</tt></li> -</ul> - -<div class="rationale"><p>Rationale: <tt>retainCount</tt> is banned -because ARC robs it of consistent semantics. The others were banned -after weighing three options for how to deal with message sends:</p> - -<p><b>Honoring</b> them would work out very poorly if a programmer -naively or accidentally tried to incorporate code written for manual -retain/release code into an ARC program. At best, such code would do -twice as much work as necessary; quite frequently, however, ARC and -the explicit code would both try to balance the same retain, leading -to crashes. The cost is losing the ability to perform <q>unrooted</q> -retains, i.e. retains not logically corresponding to a strong -reference in the object graph.</p> - -<p><b>Ignoring</b> them would badly violate user expectations about their -code. While it <em>would</em> make it easier to develop code simultaneously -for ARC and non-ARC, there is very little reason to do so except for -certain library developers. ARC and non-ARC translation units share -an execution model and can seamlessly interoperate. Within a -translation unit, a developer who faithfully maintains their code in -non-ARC mode is suffering all the restrictions of ARC for zero -benefit, while a developer who isn't testing the non-ARC mode is -likely to be unpleasantly surprised if they try to go back to it.</p> - -<p><b>Banning</b> them has the disadvantage of making it very awkward -to migrate existing code to ARC. The best answer to that, given a -number of other changes and restrictions in ARC, is to provide a -specialized tool to assist users in that migration.</p> - -<p>Implementing these methods was banned because they are too integral -to the semantics of ARC; many tricks which worked tolerably under -manual reference counting will misbehave if ARC performs an ephemeral -extra retain or two. If absolutely required, it is still possible to -implement them in non-ARC code, for example in a category; the -implementations must obey the <a href="#objects.retains">semantics</a> -laid out elsewhere in this document.</p> - -</div> -</div> <!-- misc.special_methods.retain --> - -<div id="misc.special_methods.dealloc"> -<h1><tt>dealloc</tt></h1> - -<p>A program is ill-formed if it contains a message send -or <tt>@selector</tt> expression for the selector <tt>dealloc</tt>.</p> - -<div class="rationale"><p>Rationale: there are no legitimate reasons -to call <tt>dealloc</tt> directly.</p></div> - -<p>A class may provide a method definition for an instance method -named <tt>dealloc</tt>. This method will be called after the final -<tt>release</tt> of the object but before it is deallocated or any of -its instance variables are destroyed. The superclass's implementation -of <tt>dealloc</tt> will be called automatically when the method -returns.</p> - -<div class="rationale"><p>Rationale: even though ARC destroys instance -variables automatically, there are still legitimate reasons to write -a <tt>dealloc</tt> method, such as freeing non-retainable resources. -Failing to call <tt>[super dealloc]</tt> in such a method is nearly -always a bug. Sometimes, the object is simply trying to prevent -itself from being destroyed, but <tt>dealloc</tt> is really far too -late for the object to be raising such objections. Somewhat more -legitimately, an object may have been pool-allocated and should not be -deallocated with <tt>free</tt>; for now, this can only be supported -with a <tt>dealloc</tt> implementation outside of ARC. Such an -implementation must be very careful to do all the other work -that <tt>NSObject</tt>'s <tt>dealloc</tt> would, which is outside the -scope of this document to describe.</p></div> - -<p>The instance variables for an ARC-compiled class will be destroyed -at some point after control enters the <tt>dealloc</tt> method for the -root class of the class. The ordering of the destruction of instance -variables is unspecified, both within a single class and between -subclasses and superclasses.</p> - -<div class="rationale"><p>Rationale: the traditional, non-ARC pattern -for destroying instance variables is to destroy them immediately -before calling <tt>[super dealloc]</tt>. Unfortunately, message -sends from the superclass are quite capable of reaching methods in the -subclass, and those methods may well read or write to those instance -variables. Making such message sends from dealloc is generally -discouraged, since the subclass may well rely on other invariants that -were broken during <tt>dealloc</tt>, but it's not so inescapably -dangerous that we felt comfortable calling it undefined behavior. -Therefore we chose to delay destroying the instance variables to a -point at which message sends are clearly disallowed: the point at -which the root class's deallocation routines take over.</p> - -<p>In most code, the difference is not observable. It can, however, -be observed if an instance variable holds a strong reference to an -object whose deallocation will trigger a side-effect which must be -carefully ordered with respect to the destruction of the super class. -Such code violates the design principle that semantically important -behavior should be explicit. A simple fix is to clear the instance -variable manually during <tt>dealloc</tt>; a more holistic solution is -to move semantically important side-effects out of -<tt>dealloc</tt> and into a separate teardown phase which can rely on -working with well-formed objects.</p></div> - -</div> - -</div> <!-- misc.special_methods --> - -<div id="autoreleasepool"> -<h1><tt>@autoreleasepool</tt></h1> - -<p>To simplify the use of autorelease pools, and to bring them under -the control of the compiler, a new kind of statement is available in -Objective-C. It is written <tt>@autoreleasepool</tt> followed by -a <i>compound-statement</i>, i.e. by a new scope delimited by curly -braces. Upon entry to this block, the current state of the -autorelease pool is captured. When the block is exited normally, -whether by fallthrough or directed control flow (such -as <tt>return</tt> or <tt>break</tt>), the autorelease pool is -restored to the saved state, releasing all the objects in it. When -the block is exited with an exception, the pool is not drained.</p> - -<p><tt>@autoreleasepool</tt> may be used in non-ARC translation units, -with equivalent semantics.</p> - -<p>A program is ill-formed if it refers to the -<tt>NSAutoreleasePool</tt> class.</p> - -<div class="rationale"><p>Rationale: autorelease pools are clearly -important for the compiler to reason about, but it is far too much to -expect the compiler to accurately reason about control dependencies -between two calls. It is also very easy to accidentally forget to -drain an autorelease pool when using the manual API, and this can -significantly inflate the process's high-water-mark. The introduction -of a new scope is unfortunate but basically required for sane -interaction with the rest of the language. Not draining the pool -during an unwind is apparently required by the Objective-C exceptions -implementation.</p></div> - -</div> <!-- autoreleasepool --> - -<div id="misc.self"> -<h1><tt>self</tt></h1> - -<p>The <tt>self</tt> parameter variable of an Objective-C method is -never actually retained by the implementation. It is undefined -behavior, or at least dangerous, to cause an object to be deallocated -during a message send to that object.</p> - -<p>To make this safe, for Objective-C instance methods <tt>self</tt> is -implicitly <tt>const</tt> unless the method is in the <a -href="#family.semantics.init"><tt>init</tt> family</a>. Further, <tt>self</tt> -is <b>always</b> implicitly <tt>const</tt> within a class method.</p> - -<div class="rationale"><p>Rationale: the cost of -retaining <tt>self</tt> in all methods was found to be prohibitive, as -it tends to be live across calls, preventing the optimizer from -proving that the retain and release are unnecessary — for good -reason, as it's quite possible in theory to cause an object to be -deallocated during its execution without this retain and release. -Since it's extremely uncommon to actually do so, even unintentionally, -and since there's no natural way for the programmer to remove this -retain/release pair otherwise (as there is for other parameters by, -say, making the variable <tt>__unsafe_unretained</tt>), we chose to -make this optimizing assumption and shift some amount of risk to the -user.</p></div> - -</div> <!-- misc.self --> - -<div id="misc.enumeration"> -<h1>Fast enumeration iteration variables</h1> - -<p>If a variable is declared in the condition of an Objective-C fast -enumeration loop, and the variable has no explicit ownership -qualifier, then it is qualified with <tt>const __strong</tt> and -objects encountered during the enumeration are not actually -retained.</p> - -<div class="rationale"><p>Rationale: this is an optimization made -possible because fast enumeration loops promise to keep the objects -retained during enumeration, and the collection itself cannot be -synchronously modified. It can be overridden by explicitly qualifying -the variable with <tt>__strong</tt>, which will make the variable -mutable again and cause the loop to retain the objects it -encounters.</p></div> - -</div> <!-- misc.enumeration --> - -<div id="misc.blocks"> -<h1>Blocks</h1> - -<p>The implicit <tt>const</tt> capture variables created when -evaluating a block literal expression have the same ownership -semantics as the local variables they capture. The capture is -performed by reading from the captured variable and initializing the -capture variable with that value; the capture variable is destroyed -when the block literal is, i.e. at the end of the enclosing scope.</p> - -<p>The <a href="#ownership.inference">inference</a> rules apply -equally to <tt>__block</tt> variables, which is a shift in semantics -from non-ARC, where <tt>__block</tt> variables did not implicitly -retain during capture.</p> - -<p><tt>__block</tt> variables of retainable object owner type are -moved off the stack by initializing the heap copy with the result of -moving from the stack copy.</p> - -<p>With the exception of retains done as part of initializing -a <tt>__strong</tt> parameter variable or reading a <tt>__weak</tt> -variable, whenever these semantics call for retaining a value of -block-pointer type, it has the effect of a <tt>Block_copy</tt>. The -optimizer may remove such copies when it sees that the result is -used only as an argument to a call.</p> - -</div> <!-- misc.blocks --> - -<div id="misc.exceptions"> -<h1>Exceptions</h1> - -<p>By default in Objective C, ARC is not exception-safe for normal -releases:</p> -<ul> -<li>It does not end the lifetime of <tt>__strong</tt> variables when -their scopes are abnormally terminated by an exception.</li> -<li>It does not perform releases which would occur at the end of -a full-expression if that full-expression throws an exception.</li> -</ul> - -<p>A program may be compiled with the option -<tt>-fobjc-arc-exceptions</tt> in order to enable these, or with the -option <tt>-fno-objc-arc-exceptions</tt> to explicitly disable them, -with the last such argument <q>winning</q>.</p> - -<div class="rationale"><p>Rationale: the standard Cocoa convention is -that exceptions signal programmer error and are not intended to be -recovered from. Making code exceptions-safe by default would impose -severe runtime and code size penalties on code that typically does not -actually care about exceptions safety. Therefore, ARC-generated code -leaks by default on exceptions, which is just fine if the process is -going to be immediately terminated anyway. Programs which do care -about recovering from exceptions should enable the option.</p></div> - -<p>In Objective-C++, <tt>-fobjc-arc-exceptions</tt> is enabled by -default.</p> - -<div class="rationale"><p>Rationale: C++ already introduces pervasive -exceptions-cleanup code of the sort that ARC introduces. C++ -programmers who have not already disabled exceptions are much more -likely to actual require exception-safety.</p></div> - -<p>ARC does end the lifetimes of <tt>__weak</tt> objects when an -exception terminates their scope unless exceptions are disabled in the -compiler.</p> - -<div class="rationale"><p>Rationale: the consequence of a -local <tt>__weak</tt> object not being destroyed is very likely to be -corruption of the Objective-C runtime, so we want to be safer here. -Of course, potentially massive leaks are about as likely to take down -the process as this corruption is if the program does try to recover -from exceptions.</p></div> - -</div> <!-- misc.exceptions --> - -<div id="misc.interior"> -<h1>Interior pointers</h1> - -<p>An Objective-C method returning a non-retainable pointer may be -annotated with the <tt>objc_returns_inner_pointer</tt> attribute to -indicate that it returns a handle to the internal data of an object, -and that this reference will be invalidated if the object is -destroyed. When such a message is sent to an object, the object's -lifetime will be extended until at least the earliest of:</p> - -<ul> -<li>the last use of the returned pointer, or any pointer derived from -it, in the calling function or</li> -<li>the autorelease pool is restored to a previous state.</li> -</ul> - -<div class="rationale"><p>Rationale: not all memory and resources are -managed with reference counts; it is common for objects to manage -private resources in their own, private way. Typically these -resources are completely encapsulated within the object, but some -classes offer their users direct access for efficiency. If ARC is not -aware of methods that return such <q>interior</q> pointers, its -optimizations can cause the owning object to be reclaimed too soon. -This attribute informs ARC that it must tread lightly.</p> - -<p>The extension rules are somewhat intentionally vague. The -autorelease pool limit is there to permit a simple implementation to -simply retain and autorelease the receiver. The other limit permits -some amount of optimization. The phrase <q>derived from</q> is -intended to encompass the results both of pointer transformations, -such as casts and arithmetic, and of loading from such derived -pointers; furthermore, it applies whether or not such derivations are -applied directly in the calling code or by other utility code (for -example, the C library routine <tt>strchr</tt>). However, the -implementation never need account for uses after a return from the -code which calls the method returning an interior pointer.</p></div> - -<p>As an exception, no extension is required if the receiver is loaded -directly from a <tt>__strong</tt> object -with <a href="#optimization.precise">precise lifetime semantics</a>.</p> - -<div class="rationale"><p>Rationale: implicit autoreleases carry the -risk of significantly inflating memory use, so it's important to -provide users a way of avoiding these autoreleases. Tying this to -precise lifetime semantics is ideal, as for local variables this -requires a very explicit annotation, which allows ARC to trust the -user with good cheer.</p></div> - -</div> <!-- misc.interior --> - -<div id="misc.c-retainable"> -<h1>C retainable pointer types</h1> - -<p>A type is a <span class="term">C retainable pointer type</span> -if it is a pointer to (possibly qualified) <tt>void</tt> or a -pointer to a (possibly qualifier) <tt>struct</tt> or <tt>class</tt> -type.</p> - -<div class="rationale"><p>Rationale: ARC does not manage pointers of -CoreFoundation type (or any of the related families of retainable C -pointers which interoperate with Objective-C for retain/release -operation). In fact, ARC does not even know how to distinguish these -types from arbitrary C pointer types. The intent of this concept is -to filter out some obviously non-object types while leaving a hook for -later tightening if a means of exhaustively marking CF types is made -available.</p></div> - -<div id="misc.c-retainable.audit"> -<h1>Auditing of C retainable pointer interfaces</h1> - -<p><span class="revision"><span class="whenRevised">[beginning Apple 4.0, LLVM 3.1]</span></span></p> - -<p>A C function may be marked with the <tt>cf_audited_transfer</tt> -attribute to express that, except as otherwise marked with attributes, -it obeys the parameter (consuming vs. non-consuming) and return -(retained vs. non-retained) conventions for a C function of its name, -namely:</p> - -<ul> -<li>A parameter of C retainable pointer type is assumed to not be -consumed unless it is marked with the <tt>cf_consumed</tt> attribute, and</li> -<li>A result of C retainable pointer type is assumed to not be -returned retained unless the function is either -marked <tt>cf_returns_retained</tt> or it follows -the create/copy naming convention and is not -marked <tt>cf_returns_not_retained</tt>.</li> -</ul> - -<p>A function obeys the <span class="term">create/copy</span> naming -convention if its name contains as a substring:</p> -<ul> -<li>either <q>Create</q> or <q>Copy</q> not followed by a lowercase letter, or</li> -<li>either <q>create</q> or <q>copy</q> not followed by a lowercase -letter and not preceded by any letter, whether uppercase or lowercase.</li> -</ul> - -<p>A second attribute, <tt>cf_unknown_transfer</tt>, signifies that a -function's transfer semantics cannot be accurately captured using any -of these annotations. A program is ill-formed if it annotates the -same function with both <tt>cf_audited_transfer</tt> -and <tt>cf_unknown_transfer</tt>.</p> - -<p>A pragma is provided to facilitate the mass annotation of interfaces:</p> - -<pre>#pragma clang arc_cf_code_audited begin -... -#pragma clang arc_cf_code_audited end</pre> - -<p>All C functions declared within the extent of this pragma are -treated as if annotated with the <tt>cf_audited_transfer</tt> -attribute unless they otherwise have the <tt>cf_unknown_transfer</tt> -attribute. The pragma is accepted in all language modes. A program -is ill-formed if it attempts to change files, whether by including a -file or ending the current file, within the extent of this pragma.</p> - -<p>It is possible to test for all the features in this section with -<tt>__has_feature(arc_cf_code_audited)</tt>.</p> - -<div class="rationale"><p>Rationale: A significant inconvenience in -ARC programming is the necessity of interacting with APIs based around -C retainable pointers. These features are designed to make it -relatively easy for API authors to quickly review and annotate their -interfaces, in turn improving the fidelity of tools such as the static -analyzer and ARC. The single-file restriction on the pragma is -designed to eliminate the risk of accidentally annotating some other -header's interfaces.</p></div> - -</div> <!-- misc.c-retainable.audit --> - -</div> <!-- misc.c-retainable --> - -</div> <!-- misc --> - -<div id="runtime"> -<h1>Runtime support</h1> - -<p>This section describes the interaction between the ARC runtime and -the code generated by the ARC compiler. This is not part of the ARC -language specification; instead, it is effectively a language-specific -ABI supplement, akin to the <q>Itanium</q> generic ABI for C++.</p> - -<p>Ownership qualification does not alter the storage requirements for -objects, except that it is undefined behavior if a <tt>__weak</tt> -object is inadequately aligned for an object of type <tt>id</tt>. The -other qualifiers may be used on explicitly under-aligned memory.</p> - -<p>The runtime tracks <tt>__weak</tt> objects which holds non-null -values. It is undefined behavior to direct modify a <tt>__weak</tt> -object which is being tracked by the runtime except through an -<a href="#runtime.objc_storeWeak"><tt>objc_storeWeak</tt></a>, -<a href="#runtime.objc_destroyWeak"><tt>objc_destroyWeak</tt></a>, -or <a href="#runtime.objc_moveWeak"><tt>objc_moveWeak</tt></a> -call.</p> - -<p>The runtime must provide a number of new entrypoints which the -compiler may emit, which are described in the remainder of this -section.</p> - -<div class="rationale"><p>Rationale: Several of these functions are -semantically equivalent to a message send; we emit calls to C -functions instead because:</p> -<ul> -<li>the machine code to do so is significantly smaller,</li> -<li>it is much easier to recognize the C functions in the ARC optimizer, and</li> -<li>a sufficient sophisticated runtime may be able to avoid the -message send in common cases.</li> -</ul> - -<p>Several other of these functions are <q>fused</q> operations which -can be described entirely in terms of other operations. We use the -fused operations primarily as a code-size optimization, although in -some cases there is also a real potential for avoiding redundant -operations in the runtime.</p> - -</div> - -<div id="runtime.objc_autorelease"> -<h1><tt>id objc_autorelease(id value);</tt></h1> -<p><i>Precondition:</i> <tt>value</tt> is null or a pointer to a -valid object.</p> -<p>If <tt>value</tt> is null, this call has no effect. Otherwise, it -adds the object to the innermost autorelease pool exactly as if the -object had been sent the <tt>autorelease</tt> message.</p> -<p>Always returns <tt>value</tt>.</p> -</div> <!-- runtime.objc_autorelease --> - -<div id="runtime.objc_autoreleasePoolPop"> -<h1><tt>void objc_autoreleasePoolPop(void *pool);</tt></h1> -<p><i>Precondition:</i> <tt>pool</tt> is the result of a previous call to -<a href="runtime.objc_autoreleasePoolPush"><tt>objc_autoreleasePoolPush</tt></a> -on the current thread, where neither <tt>pool</tt> nor any enclosing -pool have previously been popped.</p> -<p>Releases all the objects added to the given autorelease pool and -any autorelease pools it encloses, then sets the current autorelease -pool to the pool directly enclosing <tt>pool</tt>.</p> -</div> <!-- runtime.objc_autoreleasePoolPop --> - -<div id="runtime.objc_autoreleasePoolPush"> -<h1><tt>void *objc_autoreleasePoolPush(void);</tt></h1> -<p>Creates a new autorelease pool that is enclosed by the current -pool, makes that the current pool, and returns an opaque <q>handle</q> -to it.</p> - -<div class="rationale"><p>Rationale: while the interface is described -as an explicit hierarchy of pools, the rules allow the implementation -to just keep a stack of objects, using the stack depth as the opaque -pool handle.</p></div> - -</div> <!-- runtime.objc_autoreleasePoolPush --> - -<div id="runtime.objc_autoreleaseReturnValue"> -<h1><tt>id objc_autoreleaseReturnValue(id value);</tt></h1> -<p><i>Precondition:</i> <tt>value</tt> is null or a pointer to a -valid object.</p> -<p>If <tt>value</tt> is null, this call has no effect. Otherwise, it -makes a best effort to hand off ownership of a retain count on the -object to a call -to <a href="runtime.objc_retainAutoreleasedReturnValue"><tt>objc_retainAutoreleasedReturnValue</tt></a> -for the same object in an enclosing call frame. If this is not -possible, the object is autoreleased as above.</p> -<p>Always returns <tt>value</tt>.</p> -</div> <!-- runtime.objc_autoreleaseReturnValue --> - -<div id="runtime.objc_copyWeak"> -<h1><tt>void objc_copyWeak(id *dest, id *src);</tt></h1> -<p><i>Precondition:</i> <tt>src</tt> is a valid pointer which either -contains a null pointer or has been registered as a <tt>__weak</tt> -object. <tt>dest</tt> is a valid pointer which has not been -registered as a <tt>__weak</tt> object.</p> -<p><tt>dest</tt> is initialized to be equivalent to <tt>src</tt>, -potentially registering it with the runtime. Equivalent to the -following code:</p> -<pre>void objc_copyWeak(id *dest, id *src) { - objc_release(objc_initWeak(dest, objc_loadWeakRetained(src))); -}</pre> -<p>Must be atomic with respect to calls to <tt>objc_storeWeak</tt> -on <tt>src</tt>.</p> -</div> <!-- runtime.objc_copyWeak --> - -<div id="runtime.objc_destroyWeak"> -<h1><tt>void objc_destroyWeak(id *object);</tt></h1> -<p><i>Precondition:</i> <tt>object</tt> is a valid pointer which -either contains a null pointer or has been registered as -a <tt>__weak</tt> object.</p> -<p><tt>object</tt> is unregistered as a weak object, if it ever was. -The current value of <tt>object</tt> is left unspecified; otherwise, -equivalent to the following code:</p> -<pre>void objc_destroyWeak(id *object) { - objc_storeWeak(object, nil); -}</pre> -<p>Does not need to be atomic with respect to calls -to <tt>objc_storeWeak</tt> on <tt>object</tt>.</p> -</div> <!-- runtime.objc_destroyWeak --> - -<div id="runtime.objc_initWeak"> -<h1><tt>id objc_initWeak(id *object, id value);</tt></h1> -<p><i>Precondition:</i> <tt>object</tt> is a valid pointer which has -not been registered as a <tt>__weak</tt> object. <tt>value</tt> is -null or a pointer to a valid object.</p> -<p>If <tt>value</tt> is a null pointer or the object to which it -points has begun deallocation, <tt>object</tt> is zero-initialized. -Otherwise, <tt>object</tt> is registered as a <tt>__weak</tt> object -pointing to <tt>value</tt>. Equivalent to the following code:</p> -<pre>id objc_initWeak(id *object, id value) { - *object = nil; - return objc_storeWeak(object, value); -}</pre> -<p>Returns the value of <tt>object</tt> after the call.</p> -<p>Does not need to be atomic with respect to calls -to <tt>objc_storeWeak</tt> on <tt>object</tt>.</p> -</div> <!-- runtime.objc_initWeak --> - -<div id="runtime.objc_loadWeak"> -<h1><tt>id objc_loadWeak(id *object);</tt></h1> -<p><i>Precondition:</i> <tt>object</tt> is a valid pointer which -either contains a null pointer or has been registered as -a <tt>__weak</tt> object.</p> -<p>If <tt>object</tt> is registered as a <tt>__weak</tt> object, and -the last value stored into <tt>object</tt> has not yet been -deallocated or begun deallocation, retains and autoreleases that value -and returns it. Otherwise returns null. Equivalent to the following -code:</p> -<pre>id objc_loadWeak(id *object) { - return objc_autorelease(objc_loadWeakRetained(object)); -}</pre> -<p>Must be atomic with respect to calls to <tt>objc_storeWeak</tt> -on <tt>object</tt>.</p> -<div class="rationale">Rationale: loading weak references would be -inherently prone to race conditions without the retain.</div> -</div> <!-- runtime.objc_loadWeak --> - -<div id="runtime.objc_loadWeakRetained"> -<h1><tt>id objc_loadWeakRetained(id *object);</tt></h1> -<p><i>Precondition:</i> <tt>object</tt> is a valid pointer which -either contains a null pointer or has been registered as -a <tt>__weak</tt> object.</p> -<p>If <tt>object</tt> is registered as a <tt>__weak</tt> object, and -the last value stored into <tt>object</tt> has not yet been -deallocated or begun deallocation, retains that value and returns it. -Otherwise returns null.</p> -<p>Must be atomic with respect to calls to <tt>objc_storeWeak</tt> -on <tt>object</tt>.</p> -</div> <!-- runtime.objc_loadWeakRetained --> - -<div id="runtime.objc_moveWeak"> -<h1><tt>void objc_moveWeak(id *dest, id *src);</tt></h1> -<p><i>Precondition:</i> <tt>src</tt> is a valid pointer which either -contains a null pointer or has been registered as a <tt>__weak</tt> -object. <tt>dest</tt> is a valid pointer which has not been -registered as a <tt>__weak</tt> object.</p> -<p><tt>dest</tt> is initialized to be equivalent to <tt>src</tt>, -potentially registering it with the runtime. <tt>src</tt> may then be -left in its original state, in which case this call is equivalent -to <a href="#runtime.objc_copyWeak"><tt>objc_copyWeak</tt></a>, or it -may be left as null.</p> -<p>Must be atomic with respect to calls to <tt>objc_storeWeak</tt> -on <tt>src</tt>.</p> -</div> <!-- runtime.objc_moveWeak --> - -<div id="runtime.objc_release"> -<h1><tt>void objc_release(id value);</tt></h1> -<p><i>Precondition:</i> <tt>value</tt> is null or a pointer to a -valid object.</p> -<p>If <tt>value</tt> is null, this call has no effect. Otherwise, it -performs a release operation exactly as if the object had been sent -the <tt>release</tt> message.</p> -</div> <!-- runtime.objc_release --> - -<div id="runtime.objc_retain"> -<h1><tt>id objc_retain(id value);</tt></h1> -<p><i>Precondition:</i> <tt>value</tt> is null or a pointer to a -valid object.</p> -<p>If <tt>value</tt> is null, this call has no effect. Otherwise, it -performs a retain operation exactly as if the object had been sent -the <tt>retain</tt> message.</p> -<p>Always returns <tt>value</tt>.</p> -</div> <!-- runtime.objc_retain --> - -<div id="runtime.objc_retainAutorelease"> -<h1><tt>id objc_retainAutorelease(id value);</tt></h1> -<p><i>Precondition:</i> <tt>value</tt> is null or a pointer to a -valid object.</p> -<p>If <tt>value</tt> is null, this call has no effect. Otherwise, it -performs a retain operation followed by an autorelease operation. -Equivalent to the following code:</p> -<pre>id objc_retainAutorelease(id value) { - return objc_autorelease(objc_retain(value)); -}</pre> -<p>Always returns <tt>value</tt>.</p> -</div> <!-- runtime.objc_retainAutorelease --> - -<div id="runtime.objc_retainAutoreleaseReturnValue"> -<h1><tt>id objc_retainAutoreleaseReturnValue(id value);</tt></h1> -<p><i>Precondition:</i> <tt>value</tt> is null or a pointer to a -valid object.</p> -<p>If <tt>value</tt> is null, this call has no effect. Otherwise, it -performs a retain operation followed by the operation described in -<a href="#runtime.objc_autoreleaseReturnValue"><tt>objc_autoreleaseReturnValue</tt></a>. -Equivalent to the following code:</p> -<pre>id objc_retainAutoreleaseReturnValue(id value) { - return objc_autoreleaseReturnValue(objc_retain(value)); -}</pre> -<p>Always returns <tt>value</tt>.</p> -</div> <!-- runtime.objc_retainAutoreleaseReturnValue --> - -<div id="runtime.objc_retainAutoreleasedReturnValue"> -<h1><tt>id objc_retainAutoreleasedReturnValue(id value);</tt></h1> -<p><i>Precondition:</i> <tt>value</tt> is null or a pointer to a -valid object.</p> -<p>If <tt>value</tt> is null, this call has no effect. Otherwise, it -attempts to accept a hand off of a retain count from a call to -<a href="#runtime.objc_autoreleaseReturnValue"><tt>objc_autoreleaseReturnValue</tt></a> -on <tt>value</tt> in a recently-called function or something it -calls. If that fails, it performs a retain operation exactly -like <a href="#runtime.objc_retain"><tt>objc_retain</tt></a>.</p> -<p>Always returns <tt>value</tt>.</p> -</div> <!-- runtime.objc_retainAutoreleasedReturnValue --> - -<div id="runtime.objc_retainBlock"> -<h1><tt>id objc_retainBlock(id value);</tt></h1> -<p><i>Precondition:</i> <tt>value</tt> is null or a pointer to a -valid block object.</p> -<p>If <tt>value</tt> is null, this call has no effect. Otherwise, if -the block pointed to by <tt>value</tt> is still on the stack, it is -copied to the heap and the address of the copy is returned. Otherwise -a retain operation is performed on the block exactly as if it had been -sent the <tt>retain</tt> message.</p> -</div> <!-- runtime.objc_retainBlock --> - -<div id="runtime.objc_storeStrong"> -<h1><tt>id objc_storeStrong(id *object, id value);</tt></h1> -<p><i>Precondition:</i> <tt>object</tt> is a valid pointer to -a <tt>__strong</tt> object which is adequately aligned for a -pointer. <tt>value</tt> is null or a pointer to a valid object.</p> -<p>Performs the complete sequence for assigning to a <tt>__strong</tt> -object of non-block type. Equivalent to the following code:</p> -<pre>id objc_storeStrong(id *object, id value) { - value = [value retain]; - id oldValue = *object; - *object = value; - [oldValue release]; - return value; -}</pre> -<p>Always returns <tt>value</tt>.</p> -</div> <!-- runtime.objc_storeStrong --> - -<div id="runtime.objc_storeWeak"> -<h1><tt>id objc_storeWeak(id *object, id value);</tt></h1> -<p><i>Precondition:</i> <tt>object</tt> is a valid pointer which -either contains a null pointer or has been registered as -a <tt>__weak</tt> object. <tt>value</tt> is null or a pointer to a -valid object.</p> -<p>If <tt>value</tt> is a null pointer or the object to which it -points has begun deallocation, <tt>object</tt> is assigned null -and unregistered as a <tt>__weak</tt> object. Otherwise, -<tt>object</tt> is registered as a <tt>__weak</tt> object or has its -registration updated to point to <tt>value</tt>.</p> -<p>Returns the value of <tt>object</tt> after the call.</p> -</div> <!-- runtime.objc_storeWeak --> - -</div> <!-- runtime --> -</div> <!-- root --> -</body> -</html> diff --git a/docs/AutomaticReferenceCounting.rst b/docs/AutomaticReferenceCounting.rst new file mode 100644 index 0000000..1457b60 --- /dev/null +++ b/docs/AutomaticReferenceCounting.rst @@ -0,0 +1,2283 @@ +.. FIXME: move to the stylesheet or Sphinx plugin + +.. raw:: html + + <style> + .arc-term { font-style: italic; font-weight: bold; } + .revision { font-style: italic; } + .when-revised { font-weight: bold; font-style: normal; } + + /* + * Automatic numbering is described in this article: + * http://dev.opera.com/articles/view/automatic-numbering-with-css-counters/ + */ + /* + * Automatic numbering for the TOC. + * This is wrong from the semantics point of view, since it is an ordered + * list, but uses "ul" tag. + */ + div#contents.contents.local ul { + counter-reset: toc-section; + list-style-type: none; + } + div#contents.contents.local ul li { + counter-increment: toc-section; + background: none; // Remove bullets + } + div#contents.contents.local ul li a.reference:before { + content: counters(toc-section, ".") " "; + } + + /* Automatic numbering for the body. */ + body { + counter-reset: section subsection subsubsection; + } + .section h2 { + counter-reset: subsection subsubsection; + counter-increment: section; + } + .section h2 a.toc-backref:before { + content: counter(section) " "; + } + .section h3 { + counter-reset: subsubsection; + counter-increment: subsection; + } + .section h3 a.toc-backref:before { + content: counter(section) "." counter(subsection) " "; + } + .section h4 { + counter-increment: subsubsection; + } + .section h4 a.toc-backref:before { + content: counter(section) "." counter(subsection) "." counter(subsubsection) " "; + } + </style> + +.. role:: arc-term +.. role:: revision +.. role:: when-revised + +============================================== +Objective-C Automatic Reference Counting (ARC) +============================================== + +.. contents:: + :local: + +.. _arc.meta: + +About this document +=================== + +.. _arc.meta.purpose: + +Purpose +------- + +The first and primary purpose of this document is to serve as a complete +technical specification of Automatic Reference Counting. Given a core +Objective-C compiler and runtime, it should be possible to write a compiler and +runtime which implements these new semantics. + +The secondary purpose is to act as a rationale for why ARC was designed in this +way. This should remain tightly focused on the technical design and should not +stray into marketing speculation. + +.. _arc.meta.background: + +Background +---------- + +This document assumes a basic familiarity with C. + +:arc-term:`Blocks` are a C language extension for creating anonymous functions. +Users interact with and transfer block objects using :arc-term:`block +pointers`, which are represented like a normal pointer. A block may capture +values from local variables; when this occurs, memory must be dynamically +allocated. The initial allocation is done on the stack, but the runtime +provides a ``Block_copy`` function which, given a block pointer, either copies +the underlying block object to the heap, setting its reference count to 1 and +returning the new block pointer, or (if the block object is already on the +heap) increases its reference count by 1. The paired function is +``Block_release``, which decreases the reference count by 1 and destroys the +object if the count reaches zero and is on the heap. + +Objective-C is a set of language extensions, significant enough to be +considered a different language. It is a strict superset of C. The extensions +can also be imposed on C++, producing a language called Objective-C++. The +primary feature is a single-inheritance object system; we briefly describe the +modern dialect. + +Objective-C defines a new type kind, collectively called the :arc-term:`object +pointer types`. This kind has two notable builtin members, ``id`` and +``Class``; ``id`` is the final supertype of all object pointers. The validity +of conversions between object pointer types is not checked at runtime. Users +may define :arc-term:`classes`; each class is a type, and the pointer to that +type is an object pointer type. A class may have a superclass; its pointer +type is a subtype of its superclass's pointer type. A class has a set of +:arc-term:`ivars`, fields which appear on all instances of that class. For +every class *T* there's an associated metaclass; it has no fields, its +superclass is the metaclass of *T*'s superclass, and its metaclass is a global +class. Every class has a global object whose class is the class's metaclass; +metaclasses have no associated type, so pointers to this object have type +``Class``. + +A class declaration (``@interface``) declares a set of :arc-term:`methods`. A +method has a return type, a list of argument types, and a :arc-term:`selector`: +a name like ``foo:bar:baz:``, where the number of colons corresponds to the +number of formal arguments. A method may be an instance method, in which case +it can be invoked on objects of the class, or a class method, in which case it +can be invoked on objects of the metaclass. A method may be invoked by +providing an object (called the :arc-term:`receiver`) and a list of formal +arguments interspersed with the selector, like so: + +.. code-block:: objc + + [receiver foo: fooArg bar: barArg baz: bazArg] + +This looks in the dynamic class of the receiver for a method with this name, +then in that class's superclass, etc., until it finds something it can execute. +The receiver "expression" may also be the name of a class, in which case the +actual receiver is the class object for that class, or (within method +definitions) it may be ``super``, in which case the lookup algorithm starts +with the static superclass instead of the dynamic class. The actual methods +dynamically found in a class are not those declared in the ``@interface``, but +those defined in a separate ``@implementation`` declaration; however, when +compiling a call, typechecking is done based on the methods declared in the +``@interface``. + +Method declarations may also be grouped into :arc-term:`protocols`, which are not +inherently associated with any class, but which classes may claim to follow. +Object pointer types may be qualified with additional protocols that the object +is known to support. + +:arc-term:`Class extensions` are collections of ivars and methods, designed to +allow a class's ``@interface`` to be split across multiple files; however, +there is still a primary implementation file which must see the +``@interface``\ s of all class extensions. :arc-term:`Categories` allow +methods (but not ivars) to be declared *post hoc* on an arbitrary class; the +methods in the category's ``@implementation`` will be dynamically added to that +class's method tables which the category is loaded at runtime, replacing those +methods in case of a collision. + +In the standard environment, objects are allocated on the heap, and their +lifetime is manually managed using a reference count. This is done using two +instance methods which all classes are expected to implement: ``retain`` +increases the object's reference count by 1, whereas ``release`` decreases it +by 1 and calls the instance method ``dealloc`` if the count reaches 0. To +simplify certain operations, there is also an :arc-term:`autorelease pool`, a +thread-local list of objects to call ``release`` on later; an object can be +added to this pool by calling ``autorelease`` on it. + +Block pointers may be converted to type ``id``; block objects are laid out in a +way that makes them compatible with Objective-C objects. There is a builtin +class that all block objects are considered to be objects of; this class +implements ``retain`` by adjusting the reference count, not by calling +``Block_copy``. + +.. _arc.meta.evolution: + +Evolution +--------- + +ARC is under continual evolution, and this document must be updated as the +language progresses. + +If a change increases the expressiveness of the language, for example by +lifting a restriction or by adding new syntax, the change will be annotated +with a revision marker, like so: + + ARC applies to Objective-C pointer types, block pointer types, and + :when-revised:`[beginning Apple 8.0, LLVM 3.8]` :revision:`BPTRs declared + within` ``extern "BCPL"`` blocks. + +For now, it is sensible to version this document by the releases of its sole +implementation (and its host project), clang. "LLVM X.Y" refers to an +open-source release of clang from the LLVM project. "Apple X.Y" refers to an +Apple-provided release of the Apple LLVM Compiler. Other organizations that +prepare their own, separately-versioned clang releases and wish to maintain +similar information in this document should send requests to cfe-dev. + +If a change decreases the expressiveness of the language, for example by +imposing a new restriction, this should be taken as an oversight in the +original specification and something to be avoided in all versions. Such +changes are generally to be avoided. + +.. _arc.general: + +General +======= + +Automatic Reference Counting implements automatic memory management for +Objective-C objects and blocks, freeing the programmer from the need to +explicitly insert retains and releases. It does not provide a cycle collector; +users must explicitly manage the lifetime of their objects, breaking cycles +manually or with weak or unsafe references. + +ARC may be explicitly enabled with the compiler flag ``-fobjc-arc``. It may +also be explicitly disabled with the compiler flag ``-fno-objc-arc``. The last +of these two flags appearing on the compile line "wins". + +If ARC is enabled, ``__has_feature(objc_arc)`` will expand to 1 in the +preprocessor. For more information about ``__has_feature``, see the +:ref:`language extensions <langext-__has_feature-__has_extension>` document. + +.. _arc.objects: + +Retainable object pointers +========================== + +This section describes retainable object pointers, their basic operations, and +the restrictions imposed on their use under ARC. Note in particular that it +covers the rules for pointer *values* (patterns of bits indicating the location +of a pointed-to object), not pointer *objects* (locations in memory which store +pointer values). The rules for objects are covered in the next section. + +A :arc-term:`retainable object pointer` (or "retainable pointer") is a value of +a :arc-term:`retainable object pointer type` ("retainable type"). There are +three kinds of retainable object pointer types: + +* block pointers (formed by applying the caret (``^``) declarator sigil to a + function type) +* Objective-C object pointers (``id``, ``Class``, ``NSFoo*``, etc.) +* typedefs marked with ``__attribute__((NSObject))`` + +Other pointer types, such as ``int*`` and ``CFStringRef``, are not subject to +ARC's semantics and restrictions. + +.. admonition:: Rationale + + We are not at liberty to require all code to be recompiled with ARC; + therefore, ARC must interoperate with Objective-C code which manages retains + and releases manually. In general, there are three requirements in order for + a compiler-supported reference-count system to provide reliable + interoperation: + + * The type system must reliably identify which objects are to be managed. An + ``int*`` might be a pointer to a ``malloc``'ed array, or it might be an + interior pointer to such an array, or it might point to some field or local + variable. In contrast, values of the retainable object pointer types are + never interior. + + * The type system must reliably indicate how to manage objects of a type. + This usually means that the type must imply a procedure for incrementing + and decrementing retain counts. Supporting single-ownership objects + requires a lot more explicit mediation in the language. + + * There must be reliable conventions for whether and when "ownership" is + passed between caller and callee, for both arguments and return values. + Objective-C methods follow such a convention very reliably, at least for + system libraries on Mac OS X, and functions always pass objects at +0. The + C-based APIs for Core Foundation objects, on the other hand, have much more + varied transfer semantics. + +The use of ``__attribute__((NSObject))`` typedefs is not recommended. If it's +absolutely necessary to use this attribute, be very explicit about using the +typedef, and do not assume that it will be preserved by language features like +``__typeof`` and C++ template argument substitution. + +.. admonition:: Rationale + + Any compiler operation which incidentally strips type "sugar" from a type + will yield a type without the attribute, which may result in unexpected + behavior. + +.. _arc.objects.retains: + +Retain count semantics +---------------------- + +A retainable object pointer is either a :arc-term:`null pointer` or a pointer +to a valid object. Furthermore, if it has block pointer type and is not +``null`` then it must actually be a pointer to a block object, and if it has +``Class`` type (possibly protocol-qualified) then it must actually be a pointer +to a class object. Otherwise ARC does not enforce the Objective-C type system +as long as the implementing methods follow the signature of the static type. +It is undefined behavior if ARC is exposed to an invalid pointer. + +For ARC's purposes, a valid object is one with "well-behaved" retaining +operations. Specifically, the object must be laid out such that the +Objective-C message send machinery can successfully send it the following +messages: + +* ``retain``, taking no arguments and returning a pointer to the object. +* ``release``, taking no arguments and returning ``void``. +* ``autorelease``, taking no arguments and returning a pointer to the object. + +The behavior of these methods is constrained in the following ways. The term +:arc-term:`high-level semantics` is an intentionally vague term; the intent is +that programmers must implement these methods in a way such that the compiler, +modifying code in ways it deems safe according to these constraints, will not +violate their requirements. For example, if the user puts logging statements +in ``retain``, they should not be surprised if those statements are executed +more or less often depending on optimization settings. These constraints are +not exhaustive of the optimization opportunities: values held in local +variables are subject to additional restrictions, described later in this +document. + +It is undefined behavior if a computation history featuring a send of +``retain`` followed by a send of ``release`` to the same object, with no +intervening ``release`` on that object, is not equivalent under the high-level +semantics to a computation history in which these sends are removed. Note that +this implies that these methods may not raise exceptions. + +It is undefined behavior if a computation history features any use whatsoever +of an object following the completion of a send of ``release`` that is not +preceded by a send of ``retain`` to the same object. + +The behavior of ``autorelease`` must be equivalent to sending ``release`` when +one of the autorelease pools currently in scope is popped. It may not throw an +exception. + +When the semantics call for performing one of these operations on a retainable +object pointer, if that pointer is ``null`` then the effect is a no-op. + +All of the semantics described in this document are subject to additional +:ref:`optimization rules <arc.optimization>` which permit the removal or +optimization of operations based on local knowledge of data flow. The +semantics describe the high-level behaviors that the compiler implements, not +an exact sequence of operations that a program will be compiled into. + +.. _arc.objects.operands: + +Retainable object pointers as operands and arguments +---------------------------------------------------- + +In general, ARC does not perform retain or release operations when simply using +a retainable object pointer as an operand within an expression. This includes: + +* loading a retainable pointer from an object with non-weak :ref:`ownership + <arc.ownership>`, +* passing a retainable pointer as an argument to a function or method, and +* receiving a retainable pointer as the result of a function or method call. + +.. admonition:: Rationale + + While this might seem uncontroversial, it is actually unsafe when multiple + expressions are evaluated in "parallel", as with binary operators and calls, + because (for example) one expression might load from an object while another + writes to it. However, C and C++ already call this undefined behavior + because the evaluations are unsequenced, and ARC simply exploits that here to + avoid needing to retain arguments across a large number of calls. + +The remainder of this section describes exceptions to these rules, how those +exceptions are detected, and what those exceptions imply semantically. + +.. _arc.objects.operands.consumed: + +Consumed parameters +^^^^^^^^^^^^^^^^^^^ + +A function or method parameter of retainable object pointer type may be marked +as :arc-term:`consumed`, signifying that the callee expects to take ownership +of a +1 retain count. This is done by adding the ``ns_consumed`` attribute to +the parameter declaration, like so: + +.. code-block:: objc + + void foo(__attribute((ns_consumed)) id x); + - (void) foo: (id) __attribute((ns_consumed)) x; + +This attribute is part of the type of the function or method, not the type of +the parameter. It controls only how the argument is passed and received. + +When passing such an argument, ARC retains the argument prior to making the +call. + +When receiving such an argument, ARC releases the argument at the end of the +function, subject to the usual optimizations for local values. + +.. admonition:: Rationale + + This formalizes direct transfers of ownership from a caller to a callee. The + most common scenario here is passing the ``self`` parameter to ``init``, but + it is useful to generalize. Typically, local optimization will remove any + extra retains and releases: on the caller side the retain will be merged with + a +1 source, and on the callee side the release will be rolled into the + initialization of the parameter. + +The implicit ``self`` parameter of a method may be marked as consumed by adding +``__attribute__((ns_consumes_self))`` to the method declaration. Methods in +the ``init`` :ref:`family <arc.method-families>` are treated as if they were +implicitly marked with this attribute. + +It is undefined behavior if an Objective-C message send to a method with +``ns_consumed`` parameters (other than self) is made with a null receiver. It +is undefined behavior if the method to which an Objective-C message send +statically resolves to has a different set of ``ns_consumed`` parameters than +the method it dynamically resolves to. It is undefined behavior if a block or +function call is made through a static type with a different set of +``ns_consumed`` parameters than the implementation of the called block or +function. + +.. admonition:: Rationale + + Consumed parameters with null receiver are a guaranteed leak. Mismatches + with consumed parameters will cause over-retains or over-releases, depending + on the direction. The rule about function calls is really just an + application of the existing C/C++ rule about calling functions through an + incompatible function type, but it's useful to state it explicitly. + +.. _arc.object.operands.retained-return-values: + +Retained return values +^^^^^^^^^^^^^^^^^^^^^^ + +A function or method which returns a retainable object pointer type may be +marked as returning a retained value, signifying that the caller expects to take +ownership of a +1 retain count. This is done by adding the +``ns_returns_retained`` attribute to the function or method declaration, like +so: + +.. code-block:: objc + + id foo(void) __attribute((ns_returns_retained)); + - (id) foo __attribute((ns_returns_retained)); + +This attribute is part of the type of the function or method. + +When returning from such a function or method, ARC retains the value at the +point of evaluation of the return statement, before leaving all local scopes. + +When receiving a return result from such a function or method, ARC releases the +value at the end of the full-expression it is contained within, subject to the +usual optimizations for local values. + +.. admonition:: Rationale + + This formalizes direct transfers of ownership from a callee to a caller. The + most common scenario this models is the retained return from ``init``, + ``alloc``, ``new``, and ``copy`` methods, but there are other cases in the + frameworks. After optimization there are typically no extra retains and + releases required. + +Methods in the ``alloc``, ``copy``, ``init``, ``mutableCopy``, and ``new`` +:ref:`families <arc.method-families>` are implicitly marked +``__attribute__((ns_returns_retained))``. This may be suppressed by explicitly +marking the method ``__attribute__((ns_returns_not_retained))``. + +It is undefined behavior if the method to which an Objective-C message send +statically resolves has different retain semantics on its result from the +method it dynamically resolves to. It is undefined behavior if a block or +function call is made through a static type with different retain semantics on +its result from the implementation of the called block or function. + +.. admonition:: Rationale + + Mismatches with returned results will cause over-retains or over-releases, + depending on the direction. Again, the rule about function calls is really + just an application of the existing C/C++ rule about calling functions + through an incompatible function type. + +.. _arc.objects.operands.unretained-returns: + +Unretained return values +^^^^^^^^^^^^^^^^^^^^^^^^ + +A method or function which returns a retainable object type but does not return +a retained value must ensure that the object is still valid across the return +boundary. + +When returning from such a function or method, ARC retains the value at the +point of evaluation of the return statement, then leaves all local scopes, and +then balances out the retain while ensuring that the value lives across the +call boundary. In the worst case, this may involve an ``autorelease``, but +callers must not assume that the value is actually in the autorelease pool. + +ARC performs no extra mandatory work on the caller side, although it may elect +to do something to shorten the lifetime of the returned value. + +.. admonition:: Rationale + + It is common in non-ARC code to not return an autoreleased value; therefore + the convention does not force either path. It is convenient to not be + required to do unnecessary retains and autoreleases; this permits + optimizations such as eliding retain/autoreleases when it can be shown that + the original pointer will still be valid at the point of return. + +A method or function may be marked with +``__attribute__((ns_returns_autoreleased))`` to indicate that it returns a +pointer which is guaranteed to be valid at least as long as the innermost +autorelease pool. There are no additional semantics enforced in the definition +of such a method; it merely enables optimizations in callers. + +.. _arc.objects.operands.casts: + +Bridged casts +^^^^^^^^^^^^^ + +A :arc-term:`bridged cast` is a C-style cast annotated with one of three +keywords: + +* ``(__bridge T) op`` casts the operand to the destination type ``T``. If + ``T`` is a retainable object pointer type, then ``op`` must have a + non-retainable pointer type. If ``T`` is a non-retainable pointer type, + then ``op`` must have a retainable object pointer type. Otherwise the cast + is ill-formed. There is no transfer of ownership, and ARC inserts no retain + operations. +* ``(__bridge_retained T) op`` casts the operand, which must have retainable + object pointer type, to the destination type, which must be a non-retainable + pointer type. ARC retains the value, subject to the usual optimizations on + local values, and the recipient is responsible for balancing that +1. +* ``(__bridge_transfer T) op`` casts the operand, which must have + non-retainable pointer type, to the destination type, which must be a + retainable object pointer type. ARC will release the value at the end of + the enclosing full-expression, subject to the usual optimizations on local + values. + +These casts are required in order to transfer objects in and out of ARC +control; see the rationale in the section on :ref:`conversion of retainable +object pointers <arc.objects.restrictions.conversion>`. + +Using a ``__bridge_retained`` or ``__bridge_transfer`` cast purely to convince +ARC to emit an unbalanced retain or release, respectively, is poor form. + +.. _arc.objects.restrictions: + +Restrictions +------------ + +.. _arc.objects.restrictions.conversion: + +Conversion of retainable object pointers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In general, a program which attempts to implicitly or explicitly convert a +value of retainable object pointer type to any non-retainable type, or +vice-versa, is ill-formed. For example, an Objective-C object pointer shall +not be converted to ``void*``. As an exception, cast to ``intptr_t`` is +allowed because such casts are not transferring ownership. The :ref:`bridged +casts <arc.objects.operands.casts>` may be used to perform these conversions +where necessary. + +.. admonition:: Rationale + + We cannot ensure the correct management of the lifetime of objects if they + may be freely passed around as unmanaged types. The bridged casts are + provided so that the programmer may explicitly describe whether the cast + transfers control into or out of ARC. + +However, the following exceptions apply. + +.. _arc.objects.restrictions.conversion.with.known.semantics: + +Conversion to retainable object pointer type of expressions with known semantics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:when-revised:`[beginning Apple 4.0, LLVM 3.1]` +:revision:`These exceptions have been greatly expanded; they previously applied +only to a much-reduced subset which is difficult to categorize but which +included null pointers, message sends (under the given rules), and the various +global constants.` + +An unbridged conversion to a retainable object pointer type from a type other +than a retainable object pointer type is ill-formed, as discussed above, unless +the operand of the cast has a syntactic form which is known retained, known +unretained, or known retain-agnostic. + +An expression is :arc-term:`known retain-agnostic` if it is: + +* an Objective-C string literal, +* a load from a ``const`` system global variable of :ref:`C retainable pointer + type <arc.misc.c-retainable>`, or +* a null pointer constant. + +An expression is :arc-term:`known unretained` if it is an rvalue of :ref:`C +retainable pointer type <arc.misc.c-retainable>` and it is: + +* a direct call to a function, and either that function has the + ``cf_returns_not_retained`` attribute or it is an :ref:`audited + <arc.misc.c-retainable.audit>` function that does not have the + ``cf_returns_retained`` attribute and does not follow the create/copy naming + convention, +* a message send, and the declared method either has the + ``cf_returns_not_retained`` attribute or it has neither the + ``cf_returns_retained`` attribute nor a :ref:`selector family + <arc.method-families>` that implies a retained result. + +An expression is :arc-term:`known retained` if it is an rvalue of :ref:`C +retainable pointer type <arc.misc.c-retainable>` and it is: + +* a message send, and the declared method either has the + ``cf_returns_retained`` attribute, or it does not have the + ``cf_returns_not_retained`` attribute but it does have a :ref:`selector + family <arc.method-families>` that implies a retained result. + +Furthermore: + +* a comma expression is classified according to its right-hand side, +* a statement expression is classified according to its result expression, if + it has one, +* an lvalue-to-rvalue conversion applied to an Objective-C property lvalue is + classified according to the underlying message send, and +* a conditional operator is classified according to its second and third + operands, if they agree in classification, or else the other if one is known + retain-agnostic. + +If the cast operand is known retained, the conversion is treated as a +``__bridge_transfer`` cast. If the cast operand is known unretained or known +retain-agnostic, the conversion is treated as a ``__bridge`` cast. + +.. admonition:: Rationale + + Bridging casts are annoying. Absent the ability to completely automate the + management of CF objects, however, we are left with relatively poor attempts + to reduce the need for a glut of explicit bridges. Hence these rules. + + We've so far consciously refrained from implicitly turning retained CF + results from function calls into ``__bridge_transfer`` casts. The worry is + that some code patterns --- for example, creating a CF value, assigning it + to an ObjC-typed local, and then calling ``CFRelease`` when done --- are a + bit too likely to be accidentally accepted, leading to mysterious behavior. + +.. _arc.objects.restrictions.conversion-exception-contextual: + +Conversion from retainable object pointer type in certain contexts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:when-revised:`[beginning Apple 4.0, LLVM 3.1]` + +If an expression of retainable object pointer type is explicitly cast to a +:ref:`C retainable pointer type <arc.misc.c-retainable>`, the program is +ill-formed as discussed above unless the result is immediately used: + +* to initialize a parameter in an Objective-C message send where the parameter + is not marked with the ``cf_consumed`` attribute, or +* to initialize a parameter in a direct call to an + :ref:`audited <arc.misc.c-retainable.audit>` function where the parameter is + not marked with the ``cf_consumed`` attribute. + +.. admonition:: Rationale + + Consumed parameters are left out because ARC would naturally balance them + with a retain, which was judged too treacherous. This is in part because + several of the most common consuming functions are in the ``Release`` family, + and it would be quite unfortunate for explicit releases to be silently + balanced out in this way. + +.. _arc.ownership: + +Ownership qualification +======================= + +This section describes the behavior of *objects* of retainable object pointer +type; that is, locations in memory which store retainable object pointers. + +A type is a :arc-term:`retainable object owner type` if it is a retainable +object pointer type or an array type whose element type is a retainable object +owner type. + +An :arc-term:`ownership qualifier` is a type qualifier which applies only to +retainable object owner types. An array type is ownership-qualified according +to its element type, and adding an ownership qualifier to an array type so +qualifies its element type. + +A program is ill-formed if it attempts to apply an ownership qualifier to a +type which is already ownership-qualified, even if it is the same qualifier. +There is a single exception to this rule: an ownership qualifier may be applied +to a substituted template type parameter, which overrides the ownership +qualifier provided by the template argument. + +When forming a function type, the result type is adjusted so that any +top-level ownership qualifier is deleted. + +Except as described under the :ref:`inference rules <arc.ownership.inference>`, +a program is ill-formed if it attempts to form a pointer or reference type to a +retainable object owner type which lacks an ownership qualifier. + +.. admonition:: Rationale + + These rules, together with the inference rules, ensure that all objects and + lvalues of retainable object pointer type have an ownership qualifier. The + ability to override an ownership qualifier during template substitution is + required to counteract the :ref:`inference of __strong for template type + arguments <arc.ownership.inference.template.arguments>`. Ownership qualifiers + on return types are dropped because they serve no purpose there except to + cause spurious problems with overloading and templates. + +There are four ownership qualifiers: + +* ``__autoreleasing`` +* ``__strong`` +* ``__unsafe_unretained`` +* ``__weak`` + +A type is :arc-term:`nontrivially ownership-qualified` if it is qualified with +``__autoreleasing``, ``__strong``, or ``__weak``. + +.. _arc.ownership.spelling: + +Spelling +-------- + +The names of the ownership qualifiers are reserved for the implementation. A +program may not assume that they are or are not implemented with macros, or +what those macros expand to. + +An ownership qualifier may be written anywhere that any other type qualifier +may be written. + +If an ownership qualifier appears in the *declaration-specifiers*, the +following rules apply: + +* if the type specifier is a retainable object owner type, the qualifier + initially applies to that type; + +* otherwise, if the outermost non-array declarator is a pointer + or block pointer declarator, the qualifier initially applies to + that type; + +* otherwise the program is ill-formed. + +* If the qualifier is so applied at a position in the declaration + where the next-innermost declarator is a function declarator, and + there is an block declarator within that function declarator, then + the qualifier applies instead to that block declarator and this rule + is considered afresh beginning from the new position. + +If an ownership qualifier appears on the declarator name, or on the declared +object, it is applied to the innermost pointer or block-pointer type. + +If an ownership qualifier appears anywhere else in a declarator, it applies to +the type there. + +.. admonition:: Rationale + + Ownership qualifiers are like ``const`` and ``volatile`` in the sense + that they may sensibly apply at multiple distinct positions within a + declarator. However, unlike those qualifiers, there are many + situations where they are not meaningful, and so we make an effort + to "move" the qualifier to a place where it will be meaningful. The + general goal is to allow the programmer to write, say, ``__strong`` + before the entire declaration and have it apply in the leftmost + sensible place. + +.. _arc.ownership.spelling.property: + +Property declarations +^^^^^^^^^^^^^^^^^^^^^ + +A property of retainable object pointer type may have ownership. If the +property's type is ownership-qualified, then the property has that ownership. +If the property has one of the following modifiers, then the property has the +corresponding ownership. A property is ill-formed if it has conflicting +sources of ownership, or if it has redundant ownership modifiers, or if it has +``__autoreleasing`` ownership. + +* ``assign`` implies ``__unsafe_unretained`` ownership. +* ``copy`` implies ``__strong`` ownership, as well as the usual behavior of + copy semantics on the setter. +* ``retain`` implies ``__strong`` ownership. +* ``strong`` implies ``__strong`` ownership. +* ``unsafe_unretained`` implies ``__unsafe_unretained`` ownership. +* ``weak`` implies ``__weak`` ownership. + +With the exception of ``weak``, these modifiers are available in non-ARC +modes. + +A property's specified ownership is preserved in its metadata, but otherwise +the meaning is purely conventional unless the property is synthesized. If a +property is synthesized, then the :arc-term:`associated instance variable` is +the instance variable which is named, possibly implicitly, by the +``@synthesize`` declaration. If the associated instance variable already +exists, then its ownership qualification must equal the ownership of the +property; otherwise, the instance variable is created with that ownership +qualification. + +A property of retainable object pointer type which is synthesized without a +source of ownership has the ownership of its associated instance variable, if it +already exists; otherwise, :when-revised:`[beginning Apple 3.1, LLVM 3.1]` +:revision:`its ownership is implicitly` ``strong``. Prior to this revision, it +was ill-formed to synthesize such a property. + +.. admonition:: Rationale + + Using ``strong`` by default is safe and consistent with the generic ARC rule + about :ref:`inferring ownership <arc.ownership.inference.variables>`. It is, + unfortunately, inconsistent with the non-ARC rule which states that such + properties are implicitly ``assign``. However, that rule is clearly + untenable in ARC, since it leads to default-unsafe code. The main merit to + banning the properties is to avoid confusion with non-ARC practice, which did + not ultimately strike us as sufficient to justify requiring extra syntax and + (more importantly) forcing novices to understand ownership rules just to + declare a property when the default is so reasonable. Changing the rule away + from non-ARC practice was acceptable because we had conservatively banned the + synthesis in order to give ourselves exactly this leeway. + +Applying ``__attribute__((NSObject))`` to a property not of retainable object +pointer type has the same behavior it does outside of ARC: it requires the +property type to be some sort of pointer and permits the use of modifiers other +than ``assign``. These modifiers only affect the synthesized getter and +setter; direct accesses to the ivar (even if synthesized) still have primitive +semantics, and the value in the ivar will not be automatically released during +deallocation. + +.. _arc.ownership.semantics: + +Semantics +--------- + +There are five :arc-term:`managed operations` which may be performed on an +object of retainable object pointer type. Each qualifier specifies different +semantics for each of these operations. It is still undefined behavior to +access an object outside of its lifetime. + +A load or store with "primitive semantics" has the same semantics as the +respective operation would have on an ``void*`` lvalue with the same alignment +and non-ownership qualification. + +:arc-term:`Reading` occurs when performing a lvalue-to-rvalue conversion on an +object lvalue. + +* For ``__weak`` objects, the current pointee is retained and then released at + the end of the current full-expression. This must execute atomically with + respect to assignments and to the final release of the pointee. +* For all other objects, the lvalue is loaded with primitive semantics. + +:arc-term:`Assignment` occurs when evaluating an assignment operator. The +semantics vary based on the qualification: + +* For ``__strong`` objects, the new pointee is first retained; second, the + lvalue is loaded with primitive semantics; third, the new pointee is stored + into the lvalue with primitive semantics; and finally, the old pointee is + released. This is not performed atomically; external synchronization must be + used to make this safe in the face of concurrent loads and stores. +* For ``__weak`` objects, the lvalue is updated to point to the new pointee, + unless the new pointee is an object currently undergoing deallocation, in + which case the lvalue is updated to a null pointer. This must execute + atomically with respect to other assignments to the object, to reads from the + object, and to the final release of the new pointee. +* For ``__unsafe_unretained`` objects, the new pointee is stored into the + lvalue using primitive semantics. +* For ``__autoreleasing`` objects, the new pointee is retained, autoreleased, + and stored into the lvalue using primitive semantics. + +:arc-term:`Initialization` occurs when an object's lifetime begins, which +depends on its storage duration. Initialization proceeds in two stages: + +#. First, a null pointer is stored into the lvalue using primitive semantics. + This step is skipped if the object is ``__unsafe_unretained``. +#. Second, if the object has an initializer, that expression is evaluated and + then assigned into the object using the usual assignment semantics. + +:arc-term:`Destruction` occurs when an object's lifetime ends. In all cases it +is semantically equivalent to assigning a null pointer to the object, with the +proviso that of course the object cannot be legally read after the object's +lifetime ends. + +:arc-term:`Moving` occurs in specific situations where an lvalue is "moved +from", meaning that its current pointee will be used but the object may be left +in a different (but still valid) state. This arises with ``__block`` variables +and rvalue references in C++. For ``__strong`` lvalues, moving is equivalent +to loading the lvalue with primitive semantics, writing a null pointer to it +with primitive semantics, and then releasing the result of the load at the end +of the current full-expression. For all other lvalues, moving is equivalent to +reading the object. + +.. _arc.ownership.restrictions: + +Restrictions +------------ + +.. _arc.ownership.restrictions.weak: + +Weak-unavailable types +^^^^^^^^^^^^^^^^^^^^^^ + +It is explicitly permitted for Objective-C classes to not support ``__weak`` +references. It is undefined behavior to perform an operation with weak +assignment semantics with a pointer to an Objective-C object whose class does +not support ``__weak`` references. + +.. admonition:: Rationale + + Historically, it has been possible for a class to provide its own + reference-count implementation by overriding ``retain``, ``release``, etc. + However, weak references to an object require coordination with its class's + reference-count implementation because, among other things, weak loads and + stores must be atomic with respect to the final release. Therefore, existing + custom reference-count implementations will generally not support weak + references without additional effort. This is unavoidable without breaking + binary compatibility. + +A class may indicate that it does not support weak references by providing the +``objc_arc_weak_unavailable`` attribute on the class's interface declaration. A +retainable object pointer type is **weak-unavailable** if +is a pointer to an (optionally protocol-qualified) Objective-C class ``T`` where +``T`` or one of its superclasses has the ``objc_arc_weak_unavailable`` +attribute. A program is ill-formed if it applies the ``__weak`` ownership +qualifier to a weak-unavailable type or if the value operand of a weak +assignment operation has a weak-unavailable type. + +.. _arc.ownership.restrictions.autoreleasing: + +Storage duration of ``__autoreleasing`` objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A program is ill-formed if it declares an ``__autoreleasing`` object of +non-automatic storage duration. A program is ill-formed if it captures an +``__autoreleasing`` object in a block or, unless by reference, in a C++11 +lambda. + +.. admonition:: Rationale + + Autorelease pools are tied to the current thread and scope by their nature. + While it is possible to have temporary objects whose instance variables are + filled with autoreleased objects, there is no way that ARC can provide any + sort of safety guarantee there. + +It is undefined behavior if a non-null pointer is assigned to an +``__autoreleasing`` object while an autorelease pool is in scope and then that +object is read after the autorelease pool's scope is left. + +.. _arc.ownership.restrictions.conversion.indirect: + +Conversion of pointers to ownership-qualified types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A program is ill-formed if an expression of type ``T*`` is converted, +explicitly or implicitly, to the type ``U*``, where ``T`` and ``U`` have +different ownership qualification, unless: + +* ``T`` is qualified with ``__strong``, ``__autoreleasing``, or + ``__unsafe_unretained``, and ``U`` is qualified with both ``const`` and + ``__unsafe_unretained``; or +* either ``T`` or ``U`` is ``cv void``, where ``cv`` is an optional sequence + of non-ownership qualifiers; or +* the conversion is requested with a ``reinterpret_cast`` in Objective-C++; or +* the conversion is a well-formed :ref:`pass-by-writeback + <arc.ownership.restrictions.pass_by_writeback>`. + +The analogous rule applies to ``T&`` and ``U&`` in Objective-C++. + +.. admonition:: Rationale + + These rules provide a reasonable level of type-safety for indirect pointers, + as long as the underlying memory is not deallocated. The conversion to + ``const __unsafe_unretained`` is permitted because the semantics of reads are + equivalent across all these ownership semantics, and that's a very useful and + common pattern. The interconversion with ``void*`` is useful for allocating + memory or otherwise escaping the type system, but use it carefully. + ``reinterpret_cast`` is considered to be an obvious enough sign of taking + responsibility for any problems. + +It is undefined behavior to access an ownership-qualified object through an +lvalue of a differently-qualified type, except that any non-``__weak`` object +may be read through an ``__unsafe_unretained`` lvalue. + +It is undefined behavior if a managed operation is performed on a ``__strong`` +or ``__weak`` object without a guarantee that it contains a primitive zero +bit-pattern, or if the storage for such an object is freed or reused without the +object being first assigned a null pointer. + +.. admonition:: Rationale + + ARC cannot differentiate between an assignment operator which is intended to + "initialize" dynamic memory and one which is intended to potentially replace + a value. Therefore the object's pointer must be valid before letting ARC at + it. Similarly, C and Objective-C do not provide any language hooks for + destroying objects held in dynamic memory, so it is the programmer's + responsibility to avoid leaks (``__strong`` objects) and consistency errors + (``__weak`` objects). + +These requirements are followed automatically in Objective-C++ when creating +objects of retainable object owner type with ``new`` or ``new[]`` and destroying +them with ``delete``, ``delete[]``, or a pseudo-destructor expression. Note +that arrays of nontrivially-ownership-qualified type are not ABI compatible with +non-ARC code because the element type is non-POD: such arrays that are +``new[]``'d in ARC translation units cannot be ``delete[]``'d in non-ARC +translation units and vice-versa. + +.. _arc.ownership.restrictions.pass_by_writeback: + +Passing to an out parameter by writeback +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If the argument passed to a parameter of type ``T __autoreleasing *`` has type +``U oq *``, where ``oq`` is an ownership qualifier, then the argument is a +candidate for :arc-term:`pass-by-writeback`` if: + +* ``oq`` is ``__strong`` or ``__weak``, and +* it would be legal to initialize a ``T __strong *`` with a ``U __strong *``. + +For purposes of overload resolution, an implicit conversion sequence requiring +a pass-by-writeback is always worse than an implicit conversion sequence not +requiring a pass-by-writeback. + +The pass-by-writeback is ill-formed if the argument expression does not have a +legal form: + +* ``&var``, where ``var`` is a scalar variable of automatic storage duration + with retainable object pointer type +* a conditional expression where the second and third operands are both legal + forms +* a cast whose operand is a legal form +* a null pointer constant + +.. admonition:: Rationale + + The restriction in the form of the argument serves two purposes. First, it + makes it impossible to pass the address of an array to the argument, which + serves to protect against an otherwise serious risk of mis-inferring an + "array" argument as an out-parameter. Second, it makes it much less likely + that the user will see confusing aliasing problems due to the implementation, + below, where their store to the writeback temporary is not immediately seen + in the original argument variable. + +A pass-by-writeback is evaluated as follows: + +#. The argument is evaluated to yield a pointer ``p`` of type ``U oq *``. +#. If ``p`` is a null pointer, then a null pointer is passed as the argument, + and no further work is required for the pass-by-writeback. +#. Otherwise, a temporary of type ``T __autoreleasing`` is created and + initialized to a null pointer. +#. If the parameter is not an Objective-C method parameter marked ``out``, + then ``*p`` is read, and the result is written into the temporary with + primitive semantics. +#. The address of the temporary is passed as the argument to the actual call. +#. After the call completes, the temporary is loaded with primitive + semantics, and that value is assigned into ``*p``. + +.. admonition:: Rationale + + This is all admittedly convoluted. In an ideal world, we would see that a + local variable is being passed to an out-parameter and retroactively modify + its type to be ``__autoreleasing`` rather than ``__strong``. This would be + remarkably difficult and not always well-founded under the C type system. + However, it was judged unacceptably invasive to require programmers to write + ``__autoreleasing`` on all the variables they intend to use for + out-parameters. This was the least bad solution. + +.. _arc.ownership.restrictions.records: + +Ownership-qualified fields of structs and unions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A program is ill-formed if it declares a member of a C struct or union to have +a nontrivially ownership-qualified type. + +.. admonition:: Rationale + + The resulting type would be non-POD in the C++ sense, but C does not give us + very good language tools for managing the lifetime of aggregates, so it is + more convenient to simply forbid them. It is still possible to manage this + with a ``void*`` or an ``__unsafe_unretained`` object. + +This restriction does not apply in Objective-C++. However, nontrivally +ownership-qualified types are considered non-POD: in C++11 terms, they are not +trivially default constructible, copy constructible, move constructible, copy +assignable, move assignable, or destructible. It is a violation of C++'s One +Definition Rule to use a class outside of ARC that, under ARC, would have a +nontrivially ownership-qualified member. + +.. admonition:: Rationale + + Unlike in C, we can express all the necessary ARC semantics for + ownership-qualified subobjects as suboperations of the (default) special + member functions for the class. These functions then become non-trivial. + This has the non-obvious result that the class will have a non-trivial copy + constructor and non-trivial destructor; if this would not normally be true + outside of ARC, objects of the type will be passed and returned in an + ABI-incompatible manner. + +.. _arc.ownership.inference: + +Ownership inference +------------------- + +.. _arc.ownership.inference.variables: + +Objects +^^^^^^^ + +If an object is declared with retainable object owner type, but without an +explicit ownership qualifier, its type is implicitly adjusted to have +``__strong`` qualification. + +As a special case, if the object's base type is ``Class`` (possibly +protocol-qualified), the type is adjusted to have ``__unsafe_unretained`` +qualification instead. + +.. _arc.ownership.inference.indirect_parameters: + +Indirect parameters +^^^^^^^^^^^^^^^^^^^ + +If a function or method parameter has type ``T*``, where ``T`` is an +ownership-unqualified retainable object pointer type, then: + +* if ``T`` is ``const``-qualified or ``Class``, then it is implicitly + qualified with ``__unsafe_unretained``; +* otherwise, it is implicitly qualified with ``__autoreleasing``. + +.. admonition:: Rationale + + ``__autoreleasing`` exists mostly for this case, the Cocoa convention for + out-parameters. Since a pointer to ``const`` is obviously not an + out-parameter, we instead use a type more useful for passing arrays. If the + user instead intends to pass in a *mutable* array, inferring + ``__autoreleasing`` is the wrong thing to do; this directs some of the + caution in the following rules about writeback. + +Such a type written anywhere else would be ill-formed by the general rule +requiring ownership qualifiers. + +This rule does not apply in Objective-C++ if a parameter's type is dependent in +a template pattern and is only *instantiated* to a type which would be a +pointer to an unqualified retainable object pointer type. Such code is still +ill-formed. + +.. admonition:: Rationale + + The convention is very unlikely to be intentional in template code. + +.. _arc.ownership.inference.template.arguments: + +Template arguments +^^^^^^^^^^^^^^^^^^ + +If a template argument for a template type parameter is an retainable object +owner type that does not have an explicit ownership qualifier, it is adjusted +to have ``__strong`` qualification. This adjustment occurs regardless of +whether the template argument was deduced or explicitly specified. + +.. admonition:: Rationale + + ``__strong`` is a useful default for containers (e.g., ``std::vector<id>``), + which would otherwise require explicit qualification. Moreover, unqualified + retainable object pointer types are unlikely to be useful within templates, + since they generally need to have a qualifier applied to the before being + used. + +.. _arc.method-families: + +Method families +=============== + +An Objective-C method may fall into a :arc-term:`method family`, which is a +conventional set of behaviors ascribed to it by the Cocoa conventions. + +A method is in a certain method family if: + +* it has a ``objc_method_family`` attribute placing it in that family; or if + not that, +* it does not have an ``objc_method_family`` attribute placing it in a + different or no family, and +* its selector falls into the corresponding selector family, and +* its signature obeys the added restrictions of the method family. + +A selector is in a certain selector family if, ignoring any leading +underscores, the first component of the selector either consists entirely of +the name of the method family or it begins with that name followed by a +character other than a lowercase letter. For example, ``_perform:with:`` and +``performWith:`` would fall into the ``perform`` family (if we recognized one), +but ``performing:with`` would not. + +The families and their added restrictions are: + +* ``alloc`` methods must return a retainable object pointer type. +* ``copy`` methods must return a retainable object pointer type. +* ``mutableCopy`` methods must return a retainable object pointer type. +* ``new`` methods must return a retainable object pointer type. +* ``init`` methods must be instance methods and must return an Objective-C + pointer type. Additionally, a program is ill-formed if it declares or + contains a call to an ``init`` method whose return type is neither ``id`` nor + a pointer to a super-class or sub-class of the declaring class (if the method + was declared on a class) or the static receiver type of the call (if it was + declared on a protocol). + + .. admonition:: Rationale + + There are a fair number of existing methods with ``init``-like selectors + which nonetheless don't follow the ``init`` conventions. Typically these + are either accidental naming collisions or helper methods called during + initialization. Because of the peculiar retain/release behavior of + ``init`` methods, it's very important not to treat these methods as + ``init`` methods if they aren't meant to be. It was felt that implicitly + defining these methods out of the family based on the exact relationship + between the return type and the declaring class would be much too subtle + and fragile. Therefore we identify a small number of legitimate-seeming + return types and call everything else an error. This serves the secondary + purpose of encouraging programmers not to accidentally give methods names + in the ``init`` family. + + Note that a method with an ``init``-family selector which returns a + non-Objective-C type (e.g. ``void``) is perfectly well-formed; it simply + isn't in the ``init`` family. + +A program is ill-formed if a method's declarations, implementations, and +overrides do not all have the same method family. + +.. _arc.family.attribute: + +Explicit method family control +------------------------------ + +A method may be annotated with the ``objc_method_family`` attribute to +precisely control which method family it belongs to. If a method in an +``@implementation`` does not have this attribute, but there is a method +declared in the corresponding ``@interface`` that does, then the attribute is +copied to the declaration in the ``@implementation``. The attribute is +available outside of ARC, and may be tested for with the preprocessor query +``__has_attribute(objc_method_family)``. + +The attribute is spelled +``__attribute__((objc_method_family(`` *family* ``)))``. If *family* is +``none``, the method has no family, even if it would otherwise be considered to +have one based on its selector and type. Otherwise, *family* must be one of +``alloc``, ``copy``, ``init``, ``mutableCopy``, or ``new``, in which case the +method is considered to belong to the corresponding family regardless of its +selector. It is an error if a method that is explicitly added to a family in +this way does not meet the requirements of the family other than the selector +naming convention. + +.. admonition:: Rationale + + The rules codified in this document describe the standard conventions of + Objective-C. However, as these conventions have not heretofore been enforced + by an unforgiving mechanical system, they are only imperfectly kept, + especially as they haven't always even been precisely defined. While it is + possible to define low-level ownership semantics with attributes like + ``ns_returns_retained``, this attribute allows the user to communicate + semantic intent, which is of use both to ARC (which, e.g., treats calls to + ``init`` specially) and the static analyzer. + +.. _arc.family.semantics: + +Semantics of method families +---------------------------- + +A method's membership in a method family may imply non-standard semantics for +its parameters and return type. + +Methods in the ``alloc``, ``copy``, ``mutableCopy``, and ``new`` families --- +that is, methods in all the currently-defined families except ``init`` --- +implicitly :ref:`return a retained object +<arc.object.operands.retained-return-values>` as if they were annotated with +the ``ns_returns_retained`` attribute. This can be overridden by annotating +the method with either of the ``ns_returns_autoreleased`` or +``ns_returns_not_retained`` attributes. + +Properties also follow same naming rules as methods. This means that those in +the ``alloc``, ``copy``, ``mutableCopy``, and ``new`` families provide access +to :ref:`retained objects <arc.object.operands.retained-return-values>`. This +can be overridden by annotating the property with ``ns_returns_not_retained`` +attribute. + +.. _arc.family.semantics.init: + +Semantics of ``init`` +^^^^^^^^^^^^^^^^^^^^^ + +Methods in the ``init`` family implicitly :ref:`consume +<arc.objects.operands.consumed>` their ``self`` parameter and :ref:`return a +retained object <arc.object.operands.retained-return-values>`. Neither of +these properties can be altered through attributes. + +A call to an ``init`` method with a receiver that is either ``self`` (possibly +parenthesized or casted) or ``super`` is called a :arc-term:`delegate init +call`. It is an error for a delegate init call to be made except from an +``init`` method, and excluding blocks within such methods. + +As an exception to the :ref:`usual rule <arc.misc.self>`, the variable ``self`` +is mutable in an ``init`` method and has the usual semantics for a ``__strong`` +variable. However, it is undefined behavior and the program is ill-formed, no +diagnostic required, if an ``init`` method attempts to use the previous value +of ``self`` after the completion of a delegate init call. It is conventional, +but not required, for an ``init`` method to return ``self``. + +It is undefined behavior for a program to cause two or more calls to ``init`` +methods on the same object, except that each ``init`` method invocation may +perform at most one delegate init call. + +.. _arc.family.semantics.result_type: + +Related result types +^^^^^^^^^^^^^^^^^^^^ + +Certain methods are candidates to have :arc-term:`related result types`: + +* class methods in the ``alloc`` and ``new`` method families +* instance methods in the ``init`` family +* the instance method ``self`` +* outside of ARC, the instance methods ``retain`` and ``autorelease`` + +If the formal result type of such a method is ``id`` or protocol-qualified +``id``, or a type equal to the declaring class or a superclass, then it is said +to have a related result type. In this case, when invoked in an explicit +message send, it is assumed to return a type related to the type of the +receiver: + +* if it is a class method, and the receiver is a class name ``T``, the message + send expression has type ``T*``; otherwise +* if it is an instance method, and the receiver has type ``T``, the message + send expression has type ``T``; otherwise +* the message send expression has the normal result type of the method. + +This is a new rule of the Objective-C language and applies outside of ARC. + +.. admonition:: Rationale + + ARC's automatic code emission is more prone than most code to signature + errors, i.e. errors where a call was emitted against one method signature, + but the implementing method has an incompatible signature. Having more + precise type information helps drastically lower this risk, as well as + catching a number of latent bugs. + +.. _arc.optimization: + +Optimization +============ + +Within this section, the word :arc-term:`function` will be used to +refer to any structured unit of code, be it a C function, an +Objective-C method, or a block. + +This specification describes ARC as performing specific ``retain`` and +``release`` operations on retainable object pointers at specific +points during the execution of a program. These operations make up a +non-contiguous subsequence of the computation history of the program. +The portion of this sequence for a particular retainable object +pointer for which a specific function execution is directly +responsible is the :arc-term:`formal local retain history` of the +object pointer. The corresponding actual sequence executed is the +`dynamic local retain history`. + +However, under certain circumstances, ARC is permitted to re-order and +eliminate operations in a manner which may alter the overall +computation history beyond what is permitted by the general "as if" +rule of C/C++ and the :ref:`restrictions <arc.objects.retains>` on +the implementation of ``retain`` and ``release``. + +.. admonition:: Rationale + + Specifically, ARC is sometimes permitted to optimize ``release`` + operations in ways which might cause an object to be deallocated + before it would otherwise be. Without this, it would be almost + impossible to eliminate any ``retain``/``release`` pairs. For + example, consider the following code: + + .. code-block:: objc + + id x = _ivar; + [x foo]; + + If we were not permitted in any event to shorten the lifetime of the + object in ``x``, then we would not be able to eliminate this retain + and release unless we could prove that the message send could not + modify ``_ivar`` (or deallocate ``self``). Since message sends are + opaque to the optimizer, this is not possible, and so ARC's hands + would be almost completely tied. + +ARC makes no guarantees about the execution of a computation history +which contains undefined behavior. In particular, ARC makes no +guarantees in the presence of race conditions. + +ARC may assume that any retainable object pointers it receives or +generates are instantaneously valid from that point until a point +which, by the concurrency model of the host language, happens-after +the generation of the pointer and happens-before a release of that +object (possibly via an aliasing pointer or indirectly due to +destruction of a different object). + +.. admonition:: Rationale + + There is very little point in trying to guarantee correctness in the + presence of race conditions. ARC does not have a stack-scanning + garbage collector, and guaranteeing the atomicity of every load and + store operation would be prohibitive and preclude a vast amount of + optimization. + +ARC may assume that non-ARC code engages in sensible balancing +behavior and does not rely on exact or minimum retain count values +except as guaranteed by ``__strong`` object invariants or +1 transfer +conventions. For example, if an object is provably double-retained +and double-released, ARC may eliminate the inner retain and release; +it does not need to guard against code which performs an unbalanced +release followed by a "balancing" retain. + +.. _arc.optimization.liveness: + +Object liveness +--------------- + +ARC may not allow a retainable object ``X`` to be deallocated at a +time ``T`` in a computation history if: + +* ``X`` is the value stored in a ``__strong`` object ``S`` with + :ref:`precise lifetime semantics <arc.optimization.precise>`, or + +* ``X`` is the value stored in a ``__strong`` object ``S`` with + imprecise lifetime semantics and, at some point after ``T`` but + before the next store to ``S``, the computation history features a + load from ``S`` and in some way depends on the value loaded, or + +* ``X`` is a value described as being released at the end of the + current full-expression and, at some point after ``T`` but before + the end of the full-expression, the computation history depends + on that value. + +.. admonition:: Rationale + + The intent of the second rule is to say that objects held in normal + ``__strong`` local variables may be released as soon as the value in + the variable is no longer being used: either the variable stops + being used completely or a new value is stored in the variable. + + The intent of the third rule is to say that return values may be + released after they've been used. + +A computation history depends on a pointer value ``P`` if it: + +* performs a pointer comparison with ``P``, +* loads from ``P``, +* stores to ``P``, +* depends on a pointer value ``Q`` derived via pointer arithmetic + from ``P`` (including an instance-variable or field access), or +* depends on a pointer value ``Q`` loaded from ``P``. + +Dependency applies only to values derived directly or indirectly from +a particular expression result and does not occur merely because a +separate pointer value dynamically aliases ``P``. Furthermore, this +dependency is not carried by values that are stored to objects. + +.. admonition:: Rationale + + The restrictions on dependency are intended to make this analysis + feasible by an optimizer with only incomplete information about a + program. Essentially, dependence is carried to "obvious" uses of a + pointer. Merely passing a pointer argument to a function does not + itself cause dependence, but since generally the optimizer will not + be able to prove that the function doesn't depend on that parameter, + it will be forced to conservatively assume it does. + + Dependency propagates to values loaded from a pointer because those + values might be invalidated by deallocating the object. For + example, given the code ``__strong id x = p->ivar;``, ARC must not + move the release of ``p`` to between the load of ``p->ivar`` and the + retain of that value for storing into ``x``. + + Dependency does not propagate through stores of dependent pointer + values because doing so would allow dependency to outlive the + full-expression which produced the original value. For example, the + address of an instance variable could be written to some global + location and then freely accessed during the lifetime of the local, + or a function could return an inner pointer of an object and store + it to a local. These cases would be potentially impossible to + reason about and so would basically prevent any optimizations based + on imprecise lifetime. There are also uncommon enough to make it + reasonable to require the precise-lifetime annotation if someone + really wants to rely on them. + + Dependency does propagate through return values of pointer type. + The compelling source of need for this rule is a property accessor + which returns an un-autoreleased result; the calling function must + have the chance to operate on the value, e.g. to retain it, before + ARC releases the original pointer. Note again, however, that + dependence does not survive a store, so ARC does not guarantee the + continued validity of the return value past the end of the + full-expression. + +.. _arc.optimization.object_lifetime: + +No object lifetime extension +---------------------------- + +If, in the formal computation history of the program, an object ``X`` +has been deallocated by the time of an observable side-effect, then +ARC must cause ``X`` to be deallocated by no later than the occurrence +of that side-effect, except as influenced by the re-ordering of the +destruction of objects. + +.. admonition:: Rationale + + This rule is intended to prohibit ARC from observably extending the + lifetime of a retainable object, other than as specified in this + document. Together with the rule limiting the transformation of + releases, this rule requires ARC to eliminate retains and release + only in pairs. + + ARC's power to reorder the destruction of objects is critical to its + ability to do any optimization, for essentially the same reason that + it must retain the power to decrease the lifetime of an object. + Unfortunately, while it's generally poor style for the destruction + of objects to have arbitrary side-effects, it's certainly possible. + Hence the caveat. + +.. _arc.optimization.precise: + +Precise lifetime semantics +-------------------------- + +In general, ARC maintains an invariant that a retainable object pointer held in +a ``__strong`` object will be retained for the full formal lifetime of the +object. Objects subject to this invariant have :arc-term:`precise lifetime +semantics`. + +By default, local variables of automatic storage duration do not have precise +lifetime semantics. Such objects are simply strong references which hold +values of retainable object pointer type, and these values are still fully +subject to the optimizations on values under local control. + +.. admonition:: Rationale + + Applying these precise-lifetime semantics strictly would be prohibitive. + Many useful optimizations that might theoretically decrease the lifetime of + an object would be rendered impossible. Essentially, it promises too much. + +A local variable of retainable object owner type and automatic storage duration +may be annotated with the ``objc_precise_lifetime`` attribute to indicate that +it should be considered to be an object with precise lifetime semantics. + +.. admonition:: Rationale + + Nonetheless, it is sometimes useful to be able to force an object to be + released at a precise time, even if that object does not appear to be used. + This is likely to be uncommon enough that the syntactic weight of explicitly + requesting these semantics will not be burdensome, and may even make the code + clearer. + +.. _arc.misc: + +Miscellaneous +============= + +.. _arc.misc.special_methods: + +Special methods +--------------- + +.. _arc.misc.special_methods.retain: + +Memory management methods +^^^^^^^^^^^^^^^^^^^^^^^^^ + +A program is ill-formed if it contains a method definition, message send, or +``@selector`` expression for any of the following selectors: + +* ``autorelease`` +* ``release`` +* ``retain`` +* ``retainCount`` + +.. admonition:: Rationale + + ``retainCount`` is banned because ARC robs it of consistent semantics. The + others were banned after weighing three options for how to deal with message + sends: + + **Honoring** them would work out very poorly if a programmer naively or + accidentally tried to incorporate code written for manual retain/release code + into an ARC program. At best, such code would do twice as much work as + necessary; quite frequently, however, ARC and the explicit code would both + try to balance the same retain, leading to crashes. The cost is losing the + ability to perform "unrooted" retains, i.e. retains not logically + corresponding to a strong reference in the object graph. + + **Ignoring** them would badly violate user expectations about their code. + While it *would* make it easier to develop code simultaneously for ARC and + non-ARC, there is very little reason to do so except for certain library + developers. ARC and non-ARC translation units share an execution model and + can seamlessly interoperate. Within a translation unit, a developer who + faithfully maintains their code in non-ARC mode is suffering all the + restrictions of ARC for zero benefit, while a developer who isn't testing the + non-ARC mode is likely to be unpleasantly surprised if they try to go back to + it. + + **Banning** them has the disadvantage of making it very awkward to migrate + existing code to ARC. The best answer to that, given a number of other + changes and restrictions in ARC, is to provide a specialized tool to assist + users in that migration. + + Implementing these methods was banned because they are too integral to the + semantics of ARC; many tricks which worked tolerably under manual reference + counting will misbehave if ARC performs an ephemeral extra retain or two. If + absolutely required, it is still possible to implement them in non-ARC code, + for example in a category; the implementations must obey the :ref:`semantics + <arc.objects.retains>` laid out elsewhere in this document. + +.. _arc.misc.special_methods.dealloc: + +``dealloc`` +^^^^^^^^^^^ + +A program is ill-formed if it contains a message send or ``@selector`` +expression for the selector ``dealloc``. + +.. admonition:: Rationale + + There are no legitimate reasons to call ``dealloc`` directly. + +A class may provide a method definition for an instance method named +``dealloc``. This method will be called after the final ``release`` of the +object but before it is deallocated or any of its instance variables are +destroyed. The superclass's implementation of ``dealloc`` will be called +automatically when the method returns. + +.. admonition:: Rationale + + Even though ARC destroys instance variables automatically, there are still + legitimate reasons to write a ``dealloc`` method, such as freeing + non-retainable resources. Failing to call ``[super dealloc]`` in such a + method is nearly always a bug. Sometimes, the object is simply trying to + prevent itself from being destroyed, but ``dealloc`` is really far too late + for the object to be raising such objections. Somewhat more legitimately, an + object may have been pool-allocated and should not be deallocated with + ``free``; for now, this can only be supported with a ``dealloc`` + implementation outside of ARC. Such an implementation must be very careful + to do all the other work that ``NSObject``'s ``dealloc`` would, which is + outside the scope of this document to describe. + +The instance variables for an ARC-compiled class will be destroyed at some +point after control enters the ``dealloc`` method for the root class of the +class. The ordering of the destruction of instance variables is unspecified, +both within a single class and between subclasses and superclasses. + +.. admonition:: Rationale + + The traditional, non-ARC pattern for destroying instance variables is to + destroy them immediately before calling ``[super dealloc]``. Unfortunately, + message sends from the superclass are quite capable of reaching methods in + the subclass, and those methods may well read or write to those instance + variables. Making such message sends from dealloc is generally discouraged, + since the subclass may well rely on other invariants that were broken during + ``dealloc``, but it's not so inescapably dangerous that we felt comfortable + calling it undefined behavior. Therefore we chose to delay destroying the + instance variables to a point at which message sends are clearly disallowed: + the point at which the root class's deallocation routines take over. + + In most code, the difference is not observable. It can, however, be observed + if an instance variable holds a strong reference to an object whose + deallocation will trigger a side-effect which must be carefully ordered with + respect to the destruction of the super class. Such code violates the design + principle that semantically important behavior should be explicit. A simple + fix is to clear the instance variable manually during ``dealloc``; a more + holistic solution is to move semantically important side-effects out of + ``dealloc`` and into a separate teardown phase which can rely on working with + well-formed objects. + +.. _arc.misc.autoreleasepool: + +``@autoreleasepool`` +-------------------- + +To simplify the use of autorelease pools, and to bring them under the control +of the compiler, a new kind of statement is available in Objective-C. It is +written ``@autoreleasepool`` followed by a *compound-statement*, i.e. by a new +scope delimited by curly braces. Upon entry to this block, the current state +of the autorelease pool is captured. When the block is exited normally, +whether by fallthrough or directed control flow (such as ``return`` or +``break``), the autorelease pool is restored to the saved state, releasing all +the objects in it. When the block is exited with an exception, the pool is not +drained. + +``@autoreleasepool`` may be used in non-ARC translation units, with equivalent +semantics. + +A program is ill-formed if it refers to the ``NSAutoreleasePool`` class. + +.. admonition:: Rationale + + Autorelease pools are clearly important for the compiler to reason about, but + it is far too much to expect the compiler to accurately reason about control + dependencies between two calls. It is also very easy to accidentally forget + to drain an autorelease pool when using the manual API, and this can + significantly inflate the process's high-water-mark. The introduction of a + new scope is unfortunate but basically required for sane interaction with the + rest of the language. Not draining the pool during an unwind is apparently + required by the Objective-C exceptions implementation. + +.. _arc.misc.self: + +``self`` +-------- + +The ``self`` parameter variable of an Objective-C method is never actually +retained by the implementation. It is undefined behavior, or at least +dangerous, to cause an object to be deallocated during a message send to that +object. + +To make this safe, for Objective-C instance methods ``self`` is implicitly +``const`` unless the method is in the :ref:`init family +<arc.family.semantics.init>`. Further, ``self`` is **always** implicitly +``const`` within a class method. + +.. admonition:: Rationale + + The cost of retaining ``self`` in all methods was found to be prohibitive, as + it tends to be live across calls, preventing the optimizer from proving that + the retain and release are unnecessary --- for good reason, as it's quite + possible in theory to cause an object to be deallocated during its execution + without this retain and release. Since it's extremely uncommon to actually + do so, even unintentionally, and since there's no natural way for the + programmer to remove this retain/release pair otherwise (as there is for + other parameters by, say, making the variable ``__unsafe_unretained``), we + chose to make this optimizing assumption and shift some amount of risk to the + user. + +.. _arc.misc.enumeration: + +Fast enumeration iteration variables +------------------------------------ + +If a variable is declared in the condition of an Objective-C fast enumeration +loop, and the variable has no explicit ownership qualifier, then it is +qualified with ``const __strong`` and objects encountered during the +enumeration are not actually retained. + +.. admonition:: Rationale + + This is an optimization made possible because fast enumeration loops promise + to keep the objects retained during enumeration, and the collection itself + cannot be synchronously modified. It can be overridden by explicitly + qualifying the variable with ``__strong``, which will make the variable + mutable again and cause the loop to retain the objects it encounters. + +.. _arc.misc.blocks: + +Blocks +------ + +The implicit ``const`` capture variables created when evaluating a block +literal expression have the same ownership semantics as the local variables +they capture. The capture is performed by reading from the captured variable +and initializing the capture variable with that value; the capture variable is +destroyed when the block literal is, i.e. at the end of the enclosing scope. + +The :ref:`inference <arc.ownership.inference>` rules apply equally to +``__block`` variables, which is a shift in semantics from non-ARC, where +``__block`` variables did not implicitly retain during capture. + +``__block`` variables of retainable object owner type are moved off the stack +by initializing the heap copy with the result of moving from the stack copy. + +With the exception of retains done as part of initializing a ``__strong`` +parameter variable or reading a ``__weak`` variable, whenever these semantics +call for retaining a value of block-pointer type, it has the effect of a +``Block_copy``. The optimizer may remove such copies when it sees that the +result is used only as an argument to a call. + +.. _arc.misc.exceptions: + +Exceptions +---------- + +By default in Objective C, ARC is not exception-safe for normal releases: + +* It does not end the lifetime of ``__strong`` variables when their scopes are + abnormally terminated by an exception. +* It does not perform releases which would occur at the end of a + full-expression if that full-expression throws an exception. + +A program may be compiled with the option ``-fobjc-arc-exceptions`` in order to +enable these, or with the option ``-fno-objc-arc-exceptions`` to explicitly +disable them, with the last such argument "winning". + +.. admonition:: Rationale + + The standard Cocoa convention is that exceptions signal programmer error and + are not intended to be recovered from. Making code exceptions-safe by + default would impose severe runtime and code size penalties on code that + typically does not actually care about exceptions safety. Therefore, + ARC-generated code leaks by default on exceptions, which is just fine if the + process is going to be immediately terminated anyway. Programs which do care + about recovering from exceptions should enable the option. + +In Objective-C++, ``-fobjc-arc-exceptions`` is enabled by default. + +.. admonition:: Rationale + + C++ already introduces pervasive exceptions-cleanup code of the sort that ARC + introduces. C++ programmers who have not already disabled exceptions are + much more likely to actual require exception-safety. + +ARC does end the lifetimes of ``__weak`` objects when an exception terminates +their scope unless exceptions are disabled in the compiler. + +.. admonition:: Rationale + + The consequence of a local ``__weak`` object not being destroyed is very + likely to be corruption of the Objective-C runtime, so we want to be safer + here. Of course, potentially massive leaks are about as likely to take down + the process as this corruption is if the program does try to recover from + exceptions. + +.. _arc.misc.interior: + +Interior pointers +----------------- + +An Objective-C method returning a non-retainable pointer may be annotated with +the ``objc_returns_inner_pointer`` attribute to indicate that it returns a +handle to the internal data of an object, and that this reference will be +invalidated if the object is destroyed. When such a message is sent to an +object, the object's lifetime will be extended until at least the earliest of: + +* the last use of the returned pointer, or any pointer derived from it, in the + calling function or +* the autorelease pool is restored to a previous state. + +.. admonition:: Rationale + + Rationale: not all memory and resources are managed with reference counts; it + is common for objects to manage private resources in their own, private way. + Typically these resources are completely encapsulated within the object, but + some classes offer their users direct access for efficiency. If ARC is not + aware of methods that return such "interior" pointers, its optimizations can + cause the owning object to be reclaimed too soon. This attribute informs ARC + that it must tread lightly. + + The extension rules are somewhat intentionally vague. The autorelease pool + limit is there to permit a simple implementation to simply retain and + autorelease the receiver. The other limit permits some amount of + optimization. The phrase "derived from" is intended to encompass the results + both of pointer transformations, such as casts and arithmetic, and of loading + from such derived pointers; furthermore, it applies whether or not such + derivations are applied directly in the calling code or by other utility code + (for example, the C library routine ``strchr``). However, the implementation + never need account for uses after a return from the code which calls the + method returning an interior pointer. + +As an exception, no extension is required if the receiver is loaded directly +from a ``__strong`` object with :ref:`precise lifetime semantics +<arc.optimization.precise>`. + +.. admonition:: Rationale + + Implicit autoreleases carry the risk of significantly inflating memory use, + so it's important to provide users a way of avoiding these autoreleases. + Tying this to precise lifetime semantics is ideal, as for local variables + this requires a very explicit annotation, which allows ARC to trust the user + with good cheer. + +.. _arc.misc.c-retainable: + +C retainable pointer types +-------------------------- + +A type is a :arc-term:`C retainable pointer type` if it is a pointer to +(possibly qualified) ``void`` or a pointer to a (possibly qualifier) ``struct`` +or ``class`` type. + +.. admonition:: Rationale + + ARC does not manage pointers of CoreFoundation type (or any of the related + families of retainable C pointers which interoperate with Objective-C for + retain/release operation). In fact, ARC does not even know how to + distinguish these types from arbitrary C pointer types. The intent of this + concept is to filter out some obviously non-object types while leaving a hook + for later tightening if a means of exhaustively marking CF types is made + available. + +.. _arc.misc.c-retainable.audit: + +Auditing of C retainable pointer interfaces +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:when-revised:`[beginning Apple 4.0, LLVM 3.1]` + +A C function may be marked with the ``cf_audited_transfer`` attribute to +express that, except as otherwise marked with attributes, it obeys the +parameter (consuming vs. non-consuming) and return (retained vs. non-retained) +conventions for a C function of its name, namely: + +* A parameter of C retainable pointer type is assumed to not be consumed + unless it is marked with the ``cf_consumed`` attribute, and +* A result of C retainable pointer type is assumed to not be returned retained + unless the function is either marked ``cf_returns_retained`` or it follows + the create/copy naming convention and is not marked + ``cf_returns_not_retained``. + +A function obeys the :arc-term:`create/copy` naming convention if its name +contains as a substring: + +* either "Create" or "Copy" not followed by a lowercase letter, or +* either "create" or "copy" not followed by a lowercase letter and + not preceded by any letter, whether uppercase or lowercase. + +A second attribute, ``cf_unknown_transfer``, signifies that a function's +transfer semantics cannot be accurately captured using any of these +annotations. A program is ill-formed if it annotates the same function with +both ``cf_audited_transfer`` and ``cf_unknown_transfer``. + +A pragma is provided to facilitate the mass annotation of interfaces: + +.. code-block:: objc + + #pragma clang arc_cf_code_audited begin + ... + #pragma clang arc_cf_code_audited end + +All C functions declared within the extent of this pragma are treated as if +annotated with the ``cf_audited_transfer`` attribute unless they otherwise have +the ``cf_unknown_transfer`` attribute. The pragma is accepted in all language +modes. A program is ill-formed if it attempts to change files, whether by +including a file or ending the current file, within the extent of this pragma. + +It is possible to test for all the features in this section with +``__has_feature(arc_cf_code_audited)``. + +.. admonition:: Rationale + + A significant inconvenience in ARC programming is the necessity of + interacting with APIs based around C retainable pointers. These features are + designed to make it relatively easy for API authors to quickly review and + annotate their interfaces, in turn improving the fidelity of tools such as + the static analyzer and ARC. The single-file restriction on the pragma is + designed to eliminate the risk of accidentally annotating some other header's + interfaces. + +.. _arc.runtime: + +Runtime support +=============== + +This section describes the interaction between the ARC runtime and the code +generated by the ARC compiler. This is not part of the ARC language +specification; instead, it is effectively a language-specific ABI supplement, +akin to the "Itanium" generic ABI for C++. + +Ownership qualification does not alter the storage requirements for objects, +except that it is undefined behavior if a ``__weak`` object is inadequately +aligned for an object of type ``id``. The other qualifiers may be used on +explicitly under-aligned memory. + +The runtime tracks ``__weak`` objects which holds non-null values. It is +undefined behavior to direct modify a ``__weak`` object which is being tracked +by the runtime except through an +:ref:`objc_storeWeak <arc.runtime.objc_storeWeak>`, +:ref:`objc_destroyWeak <arc.runtime.objc_destroyWeak>`, or +:ref:`objc_moveWeak <arc.runtime.objc_moveWeak>` call. + +The runtime must provide a number of new entrypoints which the compiler may +emit, which are described in the remainder of this section. + +.. admonition:: Rationale + + Several of these functions are semantically equivalent to a message send; we + emit calls to C functions instead because: + + * the machine code to do so is significantly smaller, + * it is much easier to recognize the C functions in the ARC optimizer, and + * a sufficient sophisticated runtime may be able to avoid the message send in + common cases. + + Several other of these functions are "fused" operations which can be + described entirely in terms of other operations. We use the fused operations + primarily as a code-size optimization, although in some cases there is also a + real potential for avoiding redundant operations in the runtime. + +.. _arc.runtime.objc_autorelease: + +``id objc_autorelease(id value);`` +---------------------------------- + +*Precondition:* ``value`` is null or a pointer to a valid object. + +If ``value`` is null, this call has no effect. Otherwise, it adds the object +to the innermost autorelease pool exactly as if the object had been sent the +``autorelease`` message. + +Always returns ``value``. + +.. _arc.runtime.objc_autoreleasePoolPop: + +``void objc_autoreleasePoolPop(void *pool);`` +--------------------------------------------- + +*Precondition:* ``pool`` is the result of a previous call to +:ref:`objc_autoreleasePoolPush <arc.runtime.objc_autoreleasePoolPush>` on the +current thread, where neither ``pool`` nor any enclosing pool have previously +been popped. + +Releases all the objects added to the given autorelease pool and any +autorelease pools it encloses, then sets the current autorelease pool to the +pool directly enclosing ``pool``. + +.. _arc.runtime.objc_autoreleasePoolPush: + +``void *objc_autoreleasePoolPush(void);`` +----------------------------------------- + +Creates a new autorelease pool that is enclosed by the current pool, makes that +the current pool, and returns an opaque "handle" to it. + +.. admonition:: Rationale + + While the interface is described as an explicit hierarchy of pools, the rules + allow the implementation to just keep a stack of objects, using the stack + depth as the opaque pool handle. + +.. _arc.runtime.objc_autoreleaseReturnValue: + +``id objc_autoreleaseReturnValue(id value);`` +--------------------------------------------- + +*Precondition:* ``value`` is null or a pointer to a valid object. + +If ``value`` is null, this call has no effect. Otherwise, it makes a best +effort to hand off ownership of a retain count on the object to a call to +:ref:`objc_retainAutoreleasedReturnValue +<arc.runtime.objc_retainAutoreleasedReturnValue>` for the same object in an +enclosing call frame. If this is not possible, the object is autoreleased as +above. + +Always returns ``value``. + +.. _arc.runtime.objc_copyWeak: + +``void objc_copyWeak(id *dest, id *src);`` +------------------------------------------ + +*Precondition:* ``src`` is a valid pointer which either contains a null pointer +or has been registered as a ``__weak`` object. ``dest`` is a valid pointer +which has not been registered as a ``__weak`` object. + +``dest`` is initialized to be equivalent to ``src``, potentially registering it +with the runtime. Equivalent to the following code: + +.. code-block:: objc + + void objc_copyWeak(id *dest, id *src) { + objc_release(objc_initWeak(dest, objc_loadWeakRetained(src))); + } + +Must be atomic with respect to calls to ``objc_storeWeak`` on ``src``. + +.. _arc.runtime.objc_destroyWeak: + +``void objc_destroyWeak(id *object);`` +-------------------------------------- + +*Precondition:* ``object`` is a valid pointer which either contains a null +pointer or has been registered as a ``__weak`` object. + +``object`` is unregistered as a weak object, if it ever was. The current value +of ``object`` is left unspecified; otherwise, equivalent to the following code: + +.. code-block:: objc + + void objc_destroyWeak(id *object) { + objc_storeWeak(object, nil); + } + +Does not need to be atomic with respect to calls to ``objc_storeWeak`` on +``object``. + +.. _arc.runtime.objc_initWeak: + +``id objc_initWeak(id *object, id value);`` +------------------------------------------- + +*Precondition:* ``object`` is a valid pointer which has not been registered as +a ``__weak`` object. ``value`` is null or a pointer to a valid object. + +If ``value`` is a null pointer or the object to which it points has begun +deallocation, ``object`` is zero-initialized. Otherwise, ``object`` is +registered as a ``__weak`` object pointing to ``value``. Equivalent to the +following code: + +.. code-block:: objc + + id objc_initWeak(id *object, id value) { + *object = nil; + return objc_storeWeak(object, value); + } + +Returns the value of ``object`` after the call. + +Does not need to be atomic with respect to calls to ``objc_storeWeak`` on +``object``. + +.. _arc.runtime.objc_loadWeak: + +``id objc_loadWeak(id *object);`` +--------------------------------- + +*Precondition:* ``object`` is a valid pointer which either contains a null +pointer or has been registered as a ``__weak`` object. + +If ``object`` is registered as a ``__weak`` object, and the last value stored +into ``object`` has not yet been deallocated or begun deallocation, retains and +autoreleases that value and returns it. Otherwise returns null. Equivalent to +the following code: + +.. code-block:: objc + + id objc_loadWeak(id *object) { + return objc_autorelease(objc_loadWeakRetained(object)); + } + +Must be atomic with respect to calls to ``objc_storeWeak`` on ``object``. + +.. admonition:: Rationale + + Loading weak references would be inherently prone to race conditions without + the retain. + +.. _arc.runtime.objc_loadWeakRetained: + +``id objc_loadWeakRetained(id *object);`` +----------------------------------------- + +*Precondition:* ``object`` is a valid pointer which either contains a null +pointer or has been registered as a ``__weak`` object. + +If ``object`` is registered as a ``__weak`` object, and the last value stored +into ``object`` has not yet been deallocated or begun deallocation, retains +that value and returns it. Otherwise returns null. + +Must be atomic with respect to calls to ``objc_storeWeak`` on ``object``. + +.. _arc.runtime.objc_moveWeak: + +``void objc_moveWeak(id *dest, id *src);`` +------------------------------------------ + +*Precondition:* ``src`` is a valid pointer which either contains a null pointer +or has been registered as a ``__weak`` object. ``dest`` is a valid pointer +which has not been registered as a ``__weak`` object. + +``dest`` is initialized to be equivalent to ``src``, potentially registering it +with the runtime. ``src`` may then be left in its original state, in which +case this call is equivalent to :ref:`objc_copyWeak +<arc.runtime.objc_copyWeak>`, or it may be left as null. + +Must be atomic with respect to calls to ``objc_storeWeak`` on ``src``. + +.. _arc.runtime.objc_release: + +``void objc_release(id value);`` +-------------------------------- + +*Precondition:* ``value`` is null or a pointer to a valid object. + +If ``value`` is null, this call has no effect. Otherwise, it performs a +release operation exactly as if the object had been sent the ``release`` +message. + +.. _arc.runtime.objc_retain: + +``id objc_retain(id value);`` +----------------------------- + +*Precondition:* ``value`` is null or a pointer to a valid object. + +If ``value`` is null, this call has no effect. Otherwise, it performs a retain +operation exactly as if the object had been sent the ``retain`` message. + +Always returns ``value``. + +.. _arc.runtime.objc_retainAutorelease: + +``id objc_retainAutorelease(id value);`` +---------------------------------------- + +*Precondition:* ``value`` is null or a pointer to a valid object. + +If ``value`` is null, this call has no effect. Otherwise, it performs a retain +operation followed by an autorelease operation. Equivalent to the following +code: + +.. code-block:: objc + + id objc_retainAutorelease(id value) { + return objc_autorelease(objc_retain(value)); + } + +Always returns ``value``. + +.. _arc.runtime.objc_retainAutoreleaseReturnValue: + +``id objc_retainAutoreleaseReturnValue(id value);`` +--------------------------------------------------- + +*Precondition:* ``value`` is null or a pointer to a valid object. + +If ``value`` is null, this call has no effect. Otherwise, it performs a retain +operation followed by the operation described in +:ref:`objc_autoreleaseReturnValue <arc.runtime.objc_autoreleaseReturnValue>`. +Equivalent to the following code: + +.. code-block:: objc + + id objc_retainAutoreleaseReturnValue(id value) { + return objc_autoreleaseReturnValue(objc_retain(value)); + } + +Always returns ``value``. + +.. _arc.runtime.objc_retainAutoreleasedReturnValue: + +``id objc_retainAutoreleasedReturnValue(id value);`` +---------------------------------------------------- + +*Precondition:* ``value`` is null or a pointer to a valid object. + +If ``value`` is null, this call has no effect. Otherwise, it attempts to +accept a hand off of a retain count from a call to +:ref:`objc_autoreleaseReturnValue <arc.runtime.objc_autoreleaseReturnValue>` on +``value`` in a recently-called function or something it calls. If that fails, +it performs a retain operation exactly like :ref:`objc_retain +<arc.runtime.objc_retain>`. + +Always returns ``value``. + +.. _arc.runtime.objc_retainBlock: + +``id objc_retainBlock(id value);`` +---------------------------------- + +*Precondition:* ``value`` is null or a pointer to a valid block object. + +If ``value`` is null, this call has no effect. Otherwise, if the block pointed +to by ``value`` is still on the stack, it is copied to the heap and the address +of the copy is returned. Otherwise a retain operation is performed on the +block exactly as if it had been sent the ``retain`` message. + +.. _arc.runtime.objc_storeStrong: + +``id objc_storeStrong(id *object, id value);`` +---------------------------------------------- + +*Precondition:* ``object`` is a valid pointer to a ``__strong`` object which is +adequately aligned for a pointer. ``value`` is null or a pointer to a valid +object. + +Performs the complete sequence for assigning to a ``__strong`` object of +non-block type [*]_. Equivalent to the following code: + +.. code-block:: objc + + id objc_storeStrong(id *object, id value) { + value = [value retain]; + id oldValue = *object; + *object = value; + [oldValue release]; + return value; + } + +Always returns ``value``. + +.. [*] This does not imply that a ``__strong`` object of block type is an + invalid argument to this function. Rather it implies that an ``objc_retain`` + and not an ``objc_retainBlock`` operation will be emitted if the argument is + a block. + +.. _arc.runtime.objc_storeWeak: + +``id objc_storeWeak(id *object, id value);`` +-------------------------------------------- + +*Precondition:* ``object`` is a valid pointer which either contains a null +pointer or has been registered as a ``__weak`` object. ``value`` is null or a +pointer to a valid object. + +If ``value`` is a null pointer or the object to which it points has begun +deallocation, ``object`` is assigned null and unregistered as a ``__weak`` +object. Otherwise, ``object`` is registered as a ``__weak`` object or has its +registration updated to point to ``value``. + +Returns the value of ``object`` after the call. + diff --git a/docs/Block-ABI-Apple.rst b/docs/Block-ABI-Apple.rst new file mode 100644 index 0000000..08f3464 --- /dev/null +++ b/docs/Block-ABI-Apple.rst @@ -0,0 +1,935 @@ +================================== +Block Implementation Specification +================================== + +.. contents:: + :local: + +History +======= + +* 2008/7/14 - created. +* 2008/8/21 - revised, C++. +* 2008/9/24 - add ``NULL`` ``isa`` field to ``__block`` storage. +* 2008/10/1 - revise block layout to use a ``static`` descriptor structure. +* 2008/10/6 - revise block layout to use an unsigned long int flags. +* 2008/10/28 - specify use of ``_Block_object_assign`` and + ``_Block_object_dispose`` for all "Object" types in helper functions. +* 2008/10/30 - revise new layout to have invoke function in same place. +* 2008/10/30 - add ``__weak`` support. +* 2010/3/16 - rev for stret return, signature field. +* 2010/4/6 - improved wording. +* 2013/1/6 - improved wording and converted to rst. + +This document describes the Apple ABI implementation specification of Blocks. + +The first shipping version of this ABI is found in Mac OS X 10.6, and shall be +referred to as 10.6.ABI. As of 2010/3/16, the following describes the ABI +contract with the runtime and the compiler, and, as necessary, will be referred +to as ABI.2010.3.16. + +Since the Apple ABI references symbols from other elements of the system, any +attempt to use this ABI on systems prior to SnowLeopard is undefined. + +High Level +========== + +The ABI of ``Blocks`` consist of their layout and the runtime functions required +by the compiler. A ``Block`` consists of a structure of the following form: + +.. code-block:: c + + struct Block_literal_1 { + void *isa; // initialized to &_NSConcreteStackBlock or &_NSConcreteGlobalBlock + int flags; + int reserved; + void (*invoke)(void *, ...); + struct Block_descriptor_1 { + unsigned long int reserved; // NULL + unsigned long int size; // sizeof(struct Block_literal_1) + // optional helper functions + void (*copy_helper)(void *dst, void *src); // IFF (1<<25) + void (*dispose_helper)(void *src); // IFF (1<<25) + // required ABI.2010.3.16 + const char *signature; // IFF (1<<30) + } *descriptor; + // imported variables + }; + +The following flags bits are in use thusly for a possible ABI.2010.3.16: + +.. code-block:: c + + enum { + BLOCK_HAS_COPY_DISPOSE = (1 << 25), + BLOCK_HAS_CTOR = (1 << 26), // helpers have C++ code + BLOCK_IS_GLOBAL = (1 << 28), + BLOCK_HAS_STRET = (1 << 29), // IFF BLOCK_HAS_SIGNATURE + BLOCK_HAS_SIGNATURE = (1 << 30), + }; + +In 10.6.ABI the (1<<29) was usually set and was always ignored by the runtime - +it had been a transitional marker that did not get deleted after the +transition. This bit is now paired with (1<<30), and represented as the pair +(3<<30), for the following combinations of valid bit settings, and their +meanings: + +.. code-block:: c + + switch (flags & (3<<29)) { + case (0<<29): 10.6.ABI, no signature field available + case (1<<29): 10.6.ABI, no signature field available + case (2<<29): ABI.2010.3.16, regular calling convention, presence of signature field + case (3<<29): ABI.2010.3.16, stret calling convention, presence of signature field, + } + +The signature field is not always populated. + +The following discussions are presented as 10.6.ABI otherwise. + +``Block`` literals may occur within functions where the structure is created in +stack local memory. They may also appear as initialization expressions for +``Block`` variables of global or ``static`` local variables. + +When a ``Block`` literal expression is evaluated the stack based structure is +initialized as follows: + +1. A ``static`` descriptor structure is declared and initialized as follows: + + a. The ``invoke`` function pointer is set to a function that takes the + ``Block`` structure as its first argument and the rest of the arguments (if + any) to the ``Block`` and executes the ``Block`` compound statement. + + b. The ``size`` field is set to the size of the following ``Block`` literal + structure. + + c. The ``copy_helper`` and ``dispose_helper`` function pointers are set to + respective helper functions if they are required by the ``Block`` literal. + +2. A stack (or global) ``Block`` literal data structure is created and + initialized as follows: + + a. The ``isa`` field is set to the address of the external + ``_NSConcreteStackBlock``, which is a block of uninitialized memory supplied + in ``libSystem``, or ``_NSConcreteGlobalBlock`` if this is a static or file + level ``Block`` literal. + + b. The ``flags`` field is set to zero unless there are variables imported + into the ``Block`` that need helper functions for program level + ``Block_copy()`` and ``Block_release()`` operations, in which case the + (1<<25) flags bit is set. + +As an example, the ``Block`` literal expression: + +.. code-block:: c + + ^ { printf("hello world\n"); } + +would cause the following to be created on a 32-bit system: + +.. code-block:: c + + struct __block_literal_1 { + void *isa; + int flags; + int reserved; + void (*invoke)(struct __block_literal_1 *); + struct __block_descriptor_1 *descriptor; + }; + + void __block_invoke_1(struct __block_literal_1 *_block) { + printf("hello world\n"); + } + + static struct __block_descriptor_1 { + unsigned long int reserved; + unsigned long int Block_size; + } __block_descriptor_1 = { 0, sizeof(struct __block_literal_1), __block_invoke_1 }; + +and where the ``Block`` literal itself appears: + +.. code-block:: c + + struct __block_literal_1 _block_literal = { + &_NSConcreteStackBlock, + (1<<29), <uninitialized>, + __block_invoke_1, + &__block_descriptor_1 + }; + +A ``Block`` imports other ``Block`` references, ``const`` copies of other +variables, and variables marked ``__block``. In Objective-C, variables may +additionally be objects. + +When a ``Block`` literal expression is used as the initial value of a global +or ``static`` local variable, it is initialized as follows: + +.. code-block:: c + + struct __block_literal_1 __block_literal_1 = { + &_NSConcreteGlobalBlock, + (1<<28)|(1<<29), <uninitialized>, + __block_invoke_1, + &__block_descriptor_1 + }; + +that is, a different address is provided as the first value and a particular +(1<<28) bit is set in the ``flags`` field, and otherwise it is the same as for +stack based ``Block`` literals. This is an optimization that can be used for +any ``Block`` literal that imports no ``const`` or ``__block`` storage +variables. + +Imported Variables +================== + +Variables of ``auto`` storage class are imported as ``const`` copies. Variables +of ``__block`` storage class are imported as a pointer to an enclosing data +structure. Global variables are simply referenced and not considered as +imported. + +Imported ``const`` copy variables +--------------------------------- + +Automatic storage variables not marked with ``__block`` are imported as +``const`` copies. + +The simplest example is that of importing a variable of type ``int``: + +.. code-block:: c + + int x = 10; + void (^vv)(void) = ^{ printf("x is %d\n", x); } + x = 11; + vv(); + +which would be compiled to: + +.. code-block:: c + + struct __block_literal_2 { + void *isa; + int flags; + int reserved; + void (*invoke)(struct __block_literal_2 *); + struct __block_descriptor_2 *descriptor; + const int x; + }; + + void __block_invoke_2(struct __block_literal_2 *_block) { + printf("x is %d\n", _block->x); + } + + static struct __block_descriptor_2 { + unsigned long int reserved; + unsigned long int Block_size; + } __block_descriptor_2 = { 0, sizeof(struct __block_literal_2) }; + +and: + +.. code-block:: c + + struct __block_literal_2 __block_literal_2 = { + &_NSConcreteStackBlock, + (1<<29), <uninitialized>, + __block_invoke_2, + &__block_descriptor_2, + x + }; + +In summary, scalars, structures, unions, and function pointers are generally +imported as ``const`` copies with no need for helper functions. + +Imported ``const`` copy of ``Block`` reference +---------------------------------------------- + +The first case where copy and dispose helper functions are required is for the +case of when a ``Block`` itself is imported. In this case both a +``copy_helper`` function and a ``dispose_helper`` function are needed. The +``copy_helper`` function is passed both the existing stack based pointer and the +pointer to the new heap version and should call back into the runtime to +actually do the copy operation on the imported fields within the ``Block``. The +runtime functions are all described in :ref:`RuntimeHelperFunctions`. + +A quick example: + +.. code-block:: c + + void (^existingBlock)(void) = ...; + void (^vv)(void) = ^{ existingBlock(); } + vv(); + + struct __block_literal_3 { + ...; // existing block + }; + + struct __block_literal_4 { + void *isa; + int flags; + int reserved; + void (*invoke)(struct __block_literal_4 *); + struct __block_literal_3 *const existingBlock; + }; + + void __block_invoke_4(struct __block_literal_2 *_block) { + __block->existingBlock->invoke(__block->existingBlock); + } + + void __block_copy_4(struct __block_literal_4 *dst, struct __block_literal_4 *src) { + //_Block_copy_assign(&dst->existingBlock, src->existingBlock, 0); + _Block_object_assign(&dst->existingBlock, src->existingBlock, BLOCK_FIELD_IS_BLOCK); + } + + void __block_dispose_4(struct __block_literal_4 *src) { + // was _Block_destroy + _Block_object_dispose(src->existingBlock, BLOCK_FIELD_IS_BLOCK); + } + + static struct __block_descriptor_4 { + unsigned long int reserved; + unsigned long int Block_size; + void (*copy_helper)(struct __block_literal_4 *dst, struct __block_literal_4 *src); + void (*dispose_helper)(struct __block_literal_4 *); + } __block_descriptor_4 = { + 0, + sizeof(struct __block_literal_4), + __block_copy_4, + __block_dispose_4, + }; + +and where said ``Block`` is used: + +.. code-block:: c + + struct __block_literal_4 _block_literal = { + &_NSConcreteStackBlock, + (1<<25)|(1<<29), <uninitialized> + __block_invoke_4, + & __block_descriptor_4 + existingBlock, + }; + +Importing ``__attribute__((NSObject))`` variables +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +GCC introduces ``__attribute__((NSObject))`` on structure pointers to mean "this +is an object". This is useful because many low level data structures are +declared as opaque structure pointers, e.g. ``CFStringRef``, ``CFArrayRef``, +etc. When used from C, however, these are still really objects and are the +second case where that requires copy and dispose helper functions to be +generated. The copy helper functions generated by the compiler should use the +``_Block_object_assign`` runtime helper function and in the dispose helper the +``_Block_object_dispose`` runtime helper function should be called. + +For example, ``Block`` foo in the following: + +.. code-block:: c + + struct Opaque *__attribute__((NSObject)) objectPointer = ...; + ... + void (^foo)(void) = ^{ CFPrint(objectPointer); }; + +would have the following helper functions generated: + +.. code-block:: c + + void __block_copy_foo(struct __block_literal_5 *dst, struct __block_literal_5 *src) { + _Block_object_assign(&dst->objectPointer, src-> objectPointer, BLOCK_FIELD_IS_OBJECT); + } + + void __block_dispose_foo(struct __block_literal_5 *src) { + _Block_object_dispose(src->objectPointer, BLOCK_FIELD_IS_OBJECT); + } + +Imported ``__block`` marked variables +------------------------------------- + +Layout of ``__block`` marked variables +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The compiler must embed variables that are marked ``__block`` in a specialized +structure of the form: + +.. code-block:: c + + struct _block_byref_foo { + void *isa; + struct Block_byref *forwarding; + int flags; //refcount; + int size; + typeof(marked_variable) marked_variable; + }; + +Variables of certain types require helper functions for when ``Block_copy()`` +and ``Block_release()`` are performed upon a referencing ``Block``. At the "C" +level only variables that are of type ``Block`` or ones that have +``__attribute__((NSObject))`` marked require helper functions. In Objective-C +objects require helper functions and in C++ stack based objects require helper +functions. Variables that require helper functions use the form: + +.. code-block:: c + + struct _block_byref_foo { + void *isa; + struct _block_byref_foo *forwarding; + int flags; //refcount; + int size; + // helper functions called via Block_copy() and Block_release() + void (*byref_keep)(void *dst, void *src); + void (*byref_dispose)(void *); + typeof(marked_variable) marked_variable; + }; + +The structure is initialized such that: + + a. The ``forwarding`` pointer is set to the beginning of its enclosing + structure. + + b. The ``size`` field is initialized to the total size of the enclosing + structure. + + c. The ``flags`` field is set to either 0 if no helper functions are needed + or (1<<25) if they are. + + d. The helper functions are initialized (if present). + + e. The variable itself is set to its initial value. + + f. The ``isa`` field is set to ``NULL``. + +Access to ``__block`` variables from within its lexical scope +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In order to "move" the variable to the heap upon a ``copy_helper`` operation the +compiler must rewrite access to such a variable to be indirect through the +structures ``forwarding`` pointer. For example: + +.. code-block:: c + + int __block i = 10; + i = 11; + +would be rewritten to be: + +.. code-block:: c + + struct _block_byref_i { + void *isa; + struct _block_byref_i *forwarding; + int flags; //refcount; + int size; + int captured_i; + } i = { NULL, &i, 0, sizeof(struct _block_byref_i), 10 }; + + i.forwarding->captured_i = 11; + +In the case of a ``Block`` reference variable being marked ``__block`` the +helper code generated must use the ``_Block_object_assign`` and +``_Block_object_dispose`` routines supplied by the runtime to make the +copies. For example: + +.. code-block:: c + + __block void (voidBlock)(void) = blockA; + voidBlock = blockB; + +would translate into: + +.. code-block:: c + + struct _block_byref_voidBlock { + void *isa; + struct _block_byref_voidBlock *forwarding; + int flags; //refcount; + int size; + void (*byref_keep)(struct _block_byref_voidBlock *dst, struct _block_byref_voidBlock *src); + void (*byref_dispose)(struct _block_byref_voidBlock *); + void (^captured_voidBlock)(void); + }; + + void _block_byref_keep_helper(struct _block_byref_voidBlock *dst, struct _block_byref_voidBlock *src) { + //_Block_copy_assign(&dst->captured_voidBlock, src->captured_voidBlock, 0); + _Block_object_assign(&dst->captured_voidBlock, src->captured_voidBlock, BLOCK_FIELD_IS_BLOCK | BLOCK_BYREF_CALLER); + } + + void _block_byref_dispose_helper(struct _block_byref_voidBlock *param) { + //_Block_destroy(param->captured_voidBlock, 0); + _Block_object_dispose(param->captured_voidBlock, BLOCK_FIELD_IS_BLOCK | BLOCK_BYREF_CALLER)} + +and: + +.. code-block:: c + + struct _block_byref_voidBlock voidBlock = {( .forwarding=&voidBlock, .flags=(1<<25), .size=sizeof(struct _block_byref_voidBlock *), + .byref_keep=_block_byref_keep_helper, .byref_dispose=_block_byref_dispose_helper, + .captured_voidBlock=blockA )}; + + voidBlock.forwarding->captured_voidBlock = blockB; + +Importing ``__block`` variables into ``Blocks`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A ``Block`` that uses a ``__block`` variable in its compound statement body must +import the variable and emit ``copy_helper`` and ``dispose_helper`` helper +functions that, in turn, call back into the runtime to actually copy or release +the ``byref`` data block using the functions ``_Block_object_assign`` and +``_Block_object_dispose``. + +For example: + +.. code-block:: c + + int __block i = 2; + functioncall(^{ i = 10; }); + +would translate to: + +.. code-block:: c + + struct _block_byref_i { + void *isa; // set to NULL + struct _block_byref_voidBlock *forwarding; + int flags; //refcount; + int size; + void (*byref_keep)(struct _block_byref_i *dst, struct _block_byref_i *src); + void (*byref_dispose)(struct _block_byref_i *); + int captured_i; + }; + + + struct __block_literal_5 { + void *isa; + int flags; + int reserved; + void (*invoke)(struct __block_literal_5 *); + struct __block_descriptor_5 *descriptor; + struct _block_byref_i *i_holder; + }; + + void __block_invoke_5(struct __block_literal_5 *_block) { + _block->forwarding->captured_i = 10; + } + + void __block_copy_5(struct __block_literal_5 *dst, struct __block_literal_5 *src) { + //_Block_byref_assign_copy(&dst->captured_i, src->captured_i); + _Block_object_assign(&dst->captured_i, src->captured_i, BLOCK_FIELD_IS_BYREF | BLOCK_BYREF_CALLER); + } + + void __block_dispose_5(struct __block_literal_5 *src) { + //_Block_byref_release(src->captured_i); + _Block_object_dispose(src->captured_i, BLOCK_FIELD_IS_BYREF | BLOCK_BYREF_CALLER); + } + + static struct __block_descriptor_5 { + unsigned long int reserved; + unsigned long int Block_size; + void (*copy_helper)(struct __block_literal_5 *dst, struct __block_literal_5 *src); + void (*dispose_helper)(struct __block_literal_5 *); + } __block_descriptor_5 = { 0, sizeof(struct __block_literal_5) __block_copy_5, __block_dispose_5 }; + +and: + +.. code-block:: c + + struct _block_byref_i i = {( .forwarding=&i, .flags=0, .size=sizeof(struct _block_byref_i) )}; + struct __block_literal_5 _block_literal = { + &_NSConcreteStackBlock, + (1<<25)|(1<<29), <uninitialized>, + __block_invoke_5, + &__block_descriptor_5, + 2, + }; + +Importing ``__attribute__((NSObject))`` ``__block`` variables +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A ``__block`` variable that is also marked ``__attribute__((NSObject))`` should +have ``byref_keep`` and ``byref_dispose`` helper functions that use +``_Block_object_assign`` and ``_Block_object_dispose``. + +``__block`` escapes +^^^^^^^^^^^^^^^^^^^ + +Because ``Blocks`` referencing ``__block`` variables may have ``Block_copy()`` +performed upon them the underlying storage for the variables may move to the +heap. In Objective-C Garbage Collection Only compilation environments the heap +used is the garbage collected one and no further action is required. Otherwise +the compiler must issue a call to potentially release any heap storage for +``__block`` variables at all escapes or terminations of their scope. The call +should be: + +.. code-block:: c + + _Block_object_dispose(&_block_byref_foo, BLOCK_FIELD_IS_BYREF); + +Nesting +^^^^^^^ + +``Blocks`` may contain ``Block`` literal expressions. Any variables used within +inner blocks are imported into all enclosing ``Block`` scopes even if the +variables are not used. This includes ``const`` imports as well as ``__block`` +variables. + +Objective C Extensions to ``Blocks`` +==================================== + +Importing Objects +----------------- + +Objects should be treated as ``__attribute__((NSObject))`` variables; all +``copy_helper``, ``dispose_helper``, ``byref_keep``, and ``byref_dispose`` +helper functions should use ``_Block_object_assign`` and +``_Block_object_dispose``. There should be no code generated that uses +``*-retain`` or ``*-release`` methods. + +``Blocks`` as Objects +--------------------- + +The compiler will treat ``Blocks`` as objects when synthesizing property setters +and getters, will characterize them as objects when generating garbage +collection strong and weak layout information in the same manner as objects, and +will issue strong and weak write-barrier assignments in the same manner as +objects. + +``__weak __block`` Support +-------------------------- + +Objective-C (and Objective-C++) support the ``__weak`` attribute on ``__block`` +variables. Under normal circumstances the compiler uses the Objective-C runtime +helper support functions ``objc_assign_weak`` and ``objc_read_weak``. Both +should continue to be used for all reads and writes of ``__weak __block`` +variables: + +.. code-block:: c + + objc_read_weak(&block->byref_i->forwarding->i) + +The ``__weak`` variable is stored in a ``_block_byref_foo`` structure and the +``Block`` has copy and dispose helpers for this structure that call: + +.. code-block:: c + + _Block_object_assign(&dest->_block_byref_i, src-> _block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_BYREF); + +and: + +.. code-block:: c + + _Block_object_dispose(src->_block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_BYREF); + +In turn, the ``block_byref`` copy support helpers distinguish between whether +the ``__block`` variable is a ``Block`` or not and should either call: + +.. code-block:: c + + _Block_object_assign(&dest->_block_byref_i, src->_block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_OBJECT | BLOCK_BYREF_CALLER); + +for something declared as an object or: + +.. code-block:: c + + _Block_object_assign(&dest->_block_byref_i, src->_block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_BLOCK | BLOCK_BYREF_CALLER); + +for something declared as a ``Block``. + +A full example follows: + +.. code-block:: c + + __block __weak id obj = <initialization expression>; + functioncall(^{ [obj somemessage]; }); + +would translate to: + +.. code-block:: c + + struct _block_byref_obj { + void *isa; // uninitialized + struct _block_byref_obj *forwarding; + int flags; //refcount; + int size; + void (*byref_keep)(struct _block_byref_i *dst, struct _block_byref_i *src); + void (*byref_dispose)(struct _block_byref_i *); + id captured_obj; + }; + + void _block_byref_obj_keep(struct _block_byref_voidBlock *dst, struct _block_byref_voidBlock *src) { + //_Block_copy_assign(&dst->captured_obj, src->captured_obj, 0); + _Block_object_assign(&dst->captured_obj, src->captured_obj, BLOCK_FIELD_IS_OBJECT | BLOCK_FIELD_IS_WEAK | BLOCK_BYREF_CALLER); + } + + void _block_byref_obj_dispose(struct _block_byref_voidBlock *param) { + //_Block_destroy(param->captured_obj, 0); + _Block_object_dispose(param->captured_obj, BLOCK_FIELD_IS_OBJECT | BLOCK_FIELD_IS_WEAK | BLOCK_BYREF_CALLER); + }; + +for the block ``byref`` part and: + +.. code-block:: c + + struct __block_literal_5 { + void *isa; + int flags; + int reserved; + void (*invoke)(struct __block_literal_5 *); + struct __block_descriptor_5 *descriptor; + struct _block_byref_obj *byref_obj; + }; + + void __block_invoke_5(struct __block_literal_5 *_block) { + [objc_read_weak(&_block->byref_obj->forwarding->captured_obj) somemessage]; + } + + void __block_copy_5(struct __block_literal_5 *dst, struct __block_literal_5 *src) { + //_Block_byref_assign_copy(&dst->byref_obj, src->byref_obj); + _Block_object_assign(&dst->byref_obj, src->byref_obj, BLOCK_FIELD_IS_BYREF | BLOCK_FIELD_IS_WEAK); + } + + void __block_dispose_5(struct __block_literal_5 *src) { + //_Block_byref_release(src->byref_obj); + _Block_object_dispose(src->byref_obj, BLOCK_FIELD_IS_BYREF | BLOCK_FIELD_IS_WEAK); + } + + static struct __block_descriptor_5 { + unsigned long int reserved; + unsigned long int Block_size; + void (*copy_helper)(struct __block_literal_5 *dst, struct __block_literal_5 *src); + void (*dispose_helper)(struct __block_literal_5 *); + } __block_descriptor_5 = { 0, sizeof(struct __block_literal_5), __block_copy_5, __block_dispose_5 }; + +and within the compound statement: + +.. code-block:: c + + truct _block_byref_obj obj = {( .forwarding=&obj, .flags=(1<<25), .size=sizeof(struct _block_byref_obj), + .byref_keep=_block_byref_obj_keep, .byref_dispose=_block_byref_obj_dispose, + .captured_obj = <initialization expression> )}; + + truct __block_literal_5 _block_literal = { + &_NSConcreteStackBlock, + (1<<25)|(1<<29), <uninitialized>, + __block_invoke_5, + &__block_descriptor_5, + &obj, // a reference to the on-stack structure containing "captured_obj" + }; + + + functioncall(_block_literal->invoke(&_block_literal)); + +C++ Support +=========== + +Within a block stack based C++ objects are copied into ``const`` copies using +the copy constructor. It is an error if a stack based C++ object is used within +a block if it does not have a copy constructor. In addition both copy and +destroy helper routines must be synthesized for the block to support the +``Block_copy()`` operation, and the flags work marked with the (1<<26) bit in +addition to the (1<<25) bit. The copy helper should call the constructor using +appropriate offsets of the variable within the supplied stack based block source +and heap based destination for all ``const`` constructed copies, and similarly +should call the destructor in the destroy routine. + +As an example, suppose a C++ class ``FOO`` existed with a copy constructor. +Within a code block a stack version of a ``FOO`` object is declared and used +within a ``Block`` literal expression: + +.. code-block:: c++ + + { + FOO foo; + void (^block)(void) = ^{ printf("%d\n", foo.value()); }; + } + +The compiler would synthesize: + +.. code-block:: c++ + + struct __block_literal_10 { + void *isa; + int flags; + int reserved; + void (*invoke)(struct __block_literal_10 *); + struct __block_descriptor_10 *descriptor; + const FOO foo; + }; + + void __block_invoke_10(struct __block_literal_10 *_block) { + printf("%d\n", _block->foo.value()); + } + + void __block_literal_10(struct __block_literal_10 *dst, struct __block_literal_10 *src) { + FOO_ctor(&dst->foo, &src->foo); + } + + void __block_dispose_10(struct __block_literal_10 *src) { + FOO_dtor(&src->foo); + } + + static struct __block_descriptor_10 { + unsigned long int reserved; + unsigned long int Block_size; + void (*copy_helper)(struct __block_literal_10 *dst, struct __block_literal_10 *src); + void (*dispose_helper)(struct __block_literal_10 *); + } __block_descriptor_10 = { 0, sizeof(struct __block_literal_10), __block_copy_10, __block_dispose_10 }; + +and the code would be: + +.. code-block:: c++ + + { + FOO foo; + comp_ctor(&foo); // default constructor + struct __block_literal_10 _block_literal = { + &_NSConcreteStackBlock, + (1<<25)|(1<<26)|(1<<29), <uninitialized>, + __block_invoke_10, + &__block_descriptor_10, + }; + comp_ctor(&_block_literal->foo, &foo); // const copy into stack version + struct __block_literal_10 &block = &_block_literal; // assign literal to block variable + block->invoke(block); // invoke block + comp_dtor(&_block_literal->foo); // destroy stack version of const block copy + comp_dtor(&foo); // destroy original version + } + + +C++ objects stored in ``__block`` storage start out on the stack in a +``block_byref`` data structure as do other variables. Such objects (if not +``const`` objects) must support a regular copy constructor. The ``block_byref`` +data structure will have copy and destroy helper routines synthesized by the +compiler. The copy helper will have code created to perform the copy +constructor based on the initial stack ``block_byref`` data structure, and will +also set the (1<<26) bit in addition to the (1<<25) bit. The destroy helper +will have code to do the destructor on the object stored within the supplied +``block_byref`` heap data structure. For example, + +.. code-block:: c++ + + __block FOO blockStorageFoo; + +requires the normal constructor for the embedded ``blockStorageFoo`` object: + +.. code-block:: c++ + + FOO_ctor(& _block_byref_blockStorageFoo->blockStorageFoo); + +and at scope termination the destructor: + +.. code-block:: c++ + + FOO_dtor(& _block_byref_blockStorageFoo->blockStorageFoo); + +Note that the forwarding indirection is *NOT* used. + +The compiler would need to generate (if used from a block literal) the following +copy/dispose helpers: + +.. code-block:: c++ + + void _block_byref_obj_keep(struct _block_byref_blockStorageFoo *dst, struct _block_byref_blockStorageFoo *src) { + FOO_ctor(&dst->blockStorageFoo, &src->blockStorageFoo); + } + + void _block_byref_obj_dispose(struct _block_byref_blockStorageFoo *src) { + FOO_dtor(&src->blockStorageFoo); + } + +for the appropriately named constructor and destructor for the class/struct +``FOO``. + +To support member variable and function access the compiler will synthesize a +``const`` pointer to a block version of the ``this`` pointer. + +.. _RuntimeHelperFunctions: + +Runtime Helper Functions +======================== + +The runtime helper functions are described in +``/usr/local/include/Block_private.h``. To summarize their use, a ``Block`` +requires copy/dispose helpers if it imports any block variables, ``__block`` +storage variables, ``__attribute__((NSObject))`` variables, or C++ ``const`` +copied objects with constructor/destructors. The (1<<26) bit is set and +functions are generated. + +The block copy helper function should, for each of the variables of the type +mentioned above, call: + +.. code-block:: c + + _Block_object_assign(&dst->target, src->target, BLOCK_FIELD_<appropo>); + +in the copy helper and: + +.. code-block:: c + + _Block_object_dispose(->target, BLOCK_FIELD_<appropo>); + +in the dispose helper where ``<appropo>`` is: + +.. code-block:: c + + enum { + BLOCK_FIELD_IS_OBJECT = 3, // id, NSObject, __attribute__((NSObject)), block, ... + BLOCK_FIELD_IS_BLOCK = 7, // a block variable + BLOCK_FIELD_IS_BYREF = 8, // the on stack structure holding the __block variable + + BLOCK_FIELD_IS_WEAK = 16, // declared __weak + + BLOCK_BYREF_CALLER = 128, // called from byref copy/dispose helpers + }; + +and of course the constructors/destructors for ``const`` copied C++ objects. + +The ``block_byref`` data structure similarly requires copy/dispose helpers for +block variables, ``__attribute__((NSObject))`` variables, or C++ ``const`` +copied objects with constructor/destructors, and again the (1<<26) bit is set +and functions are generated in the same manner. + +Under ObjC we allow ``__weak`` as an attribute on ``__block`` variables, and +this causes the addition of ``BLOCK_FIELD_IS_WEAK`` orred onto the +``BLOCK_FIELD_IS_BYREF`` flag when copying the ``block_byref`` structure in the +``Block`` copy helper, and onto the ``BLOCK_FIELD_<appropo>`` field within the +``block_byref`` copy/dispose helper calls. + +The prototypes, and summary, of the helper functions are: + +.. code-block:: c + + /* Certain field types require runtime assistance when being copied to the + heap. The following function is used to copy fields of types: blocks, + pointers to byref structures, and objects (including + __attribute__((NSObject)) pointers. BLOCK_FIELD_IS_WEAK is orthogonal to + the other choices which are mutually exclusive. Only in a Block copy + helper will one see BLOCK_FIELD_IS_BYREF. + */ + void _Block_object_assign(void *destAddr, const void *object, const int flags); + + /* Similarly a compiler generated dispose helper needs to call back for each + field of the byref data structure. (Currently the implementation only + packs one field into the byref structure but in principle there could be + more). The same flags used in the copy helper should be used for each + call generated to this function: + */ + void _Block_object_dispose(const void *object, const int flags); + +Copyright +========= + +Copyright 2008-2010 Apple, Inc. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/docs/Block-ABI-Apple.txt b/docs/Block-ABI-Apple.txt index 917059b..94a4d18 100644 --- a/docs/Block-ABI-Apple.txt +++ b/docs/Block-ABI-Apple.txt @@ -1,669 +1 @@ -Block Implementation Specification - -Copyright 2008-2010 Apple, Inc. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -0. History - -2008/7/14 - created -2008/8/21 - revised, C++ -2008/9/24 - add NULL isa field to __block storage -2008/10/1 - revise block layout to use a static descriptor structure -2008/10/6 - revise block layout to use an unsigned long int flags -2008/10/28 - specify use of _Block_object_assign/dispose for all "Object" types in helper functions -2008/10/30 - revise new layout to have invoke function in same place -2008/10/30 - add __weak support - -2010/3/16 - rev for stret return, signature field -2010/4/6 - improved wording - -This document describes the Apple ABI implementation specification of Blocks. - -The first shipping version of this ABI is found in Mac OS X 10.6, and shall be referred to as 10.6.ABI. As of 2010/3/16, the following describes the ABI contract with the runtime and the compiler, and, as necessary, will be referred to as ABI.2010.3.16. - -Since the Apple ABI references symbols from other elements of the system, any attempt to use this ABI on systems prior to SnowLeopard is undefined. - -1. High Level - -The ABI of blocks consist of their layout and the runtime functions required by the compiler. -A Block consists of a structure of the following form: - -struct Block_literal_1 { - void *isa; // initialized to &_NSConcreteStackBlock or &_NSConcreteGlobalBlock - int flags; - int reserved; - void (*invoke)(void *, ...); - struct Block_descriptor_1 { - unsigned long int reserved; // NULL - unsigned long int size; // sizeof(struct Block_literal_1) - // optional helper functions - void (*copy_helper)(void *dst, void *src); // IFF (1<<25) - void (*dispose_helper)(void *src); // IFF (1<<25) - // required ABI.2010.3.16 - const char *signature; // IFF (1<<30) - } *descriptor; - // imported variables -}; - -The following flags bits are in use thusly for a possible ABI.2010.3.16: - -enum { - BLOCK_HAS_COPY_DISPOSE = (1 << 25), - BLOCK_HAS_CTOR = (1 << 26), // helpers have C++ code - BLOCK_IS_GLOBAL = (1 << 28), - BLOCK_HAS_STRET = (1 << 29), // IFF BLOCK_HAS_SIGNATURE - BLOCK_HAS_SIGNATURE = (1 << 30), -}; - -In 10.6.ABI the (1<<29) was usually set and was always ignored by the runtime - it had been a transitional marker that did not get deleted after the transition. This bit is now paired with (1<<30), and represented as the pair (3<<30), for the following combinations of valid bit settings, and their meanings. - -switch (flags & (3<<29)) { - case (0<<29): 10.6.ABI, no signature field available - case (1<<29): 10.6.ABI, no signature field available - case (2<<29): ABI.2010.3.16, regular calling convention, presence of signature field - case (3<<29): ABI.2010.3.16, stret calling convention, presence of signature field, -} - -The signature field is not always populated. - -The following discussions are presented as 10.6.ABI otherwise. - -Block literals may occur within functions where the structure is created in stack local memory. They may also appear as initialization expressions for Block variables of global or static local variables. - -When a Block literal expression is evaluated the stack based structure is initialized as follows: - -1) static descriptor structure is declared and initialized as follows: -1a) the invoke function pointer is set to a function that takes the Block structure as its first argument and the rest of the arguments (if any) to the Block and executes the Block compound statement. -1b) the size field is set to the size of the following Block literal structure. -1c) the copy_helper and dispose_helper function pointers are set to respective helper functions if they are required by the Block literal -2) a stack (or global) Block literal data structure is created and initialized as follows: -2a) the isa field is set to the address of the external _NSConcreteStackBlock, which is a block of uninitialized memory supplied in libSystem, or _NSConcreteGlobalBlock if this is a static or file level block literal. -2) The flags field is set to zero unless there are variables imported into the block that need helper functions for program level Block_copy() and Block_release() operations, in which case the (1<<25) flags bit is set. - - -As an example, the Block literal expression - ^ { printf("hello world\n"); } -would cause to be created on a 32-bit system: - -struct __block_literal_1 { - void *isa; - int flags; - int reserved; - void (*invoke)(struct __block_literal_1 *); - struct __block_descriptor_1 *descriptor; -}; - -void __block_invoke_1(struct __block_literal_1 *_block) { - printf("hello world\n"); -} - -static struct __block_descriptor_1 { - unsigned long int reserved; - unsigned long int Block_size; -} __block_descriptor_1 = { 0, sizeof(struct __block_literal_1), __block_invoke_1 }; - -and where the block literal appeared - - struct __block_literal_1 _block_literal = { - &_NSConcreteStackBlock, - (1<<29), <uninitialized>, - __block_invoke_1, - &__block_descriptor_1 - }; - -Blocks import other Block references, const copies of other variables, and variables marked __block. In Objective-C variables may additionally be objects. - -When a Block literal expression used as the initial value of a global or static local variable it is initialized as follows: - struct __block_literal_1 __block_literal_1 = { - &_NSConcreteGlobalBlock, - (1<<28)|(1<<29), <uninitialized>, - __block_invoke_1, - &__block_descriptor_1 - }; -that is, a different address is provided as the first value and a particular (1<<28) bit is set in the flags field, and otherwise it is the same as for stack based Block literals. This is an optimization that can be used for any Block literal that imports no const or __block storage variables. - - -2. Imported Variables - -Variables of "auto" storage class are imported as const copies. Variables of "__block" storage class are imported as a pointer to an enclosing data structure. Global variables are simply referenced and not considered as imported. - -2.1 Imported const copy variables - -Automatic storage variables not marked with __block are imported as const copies. - -The simplest example is that of importing a variable of type int. - - int x = 10; - void (^vv)(void) = ^{ printf("x is %d\n", x); } - x = 11; - vv(); - -would be compiled - -struct __block_literal_2 { - void *isa; - int flags; - int reserved; - void (*invoke)(struct __block_literal_2 *); - struct __block_descriptor_2 *descriptor; - const int x; -}; - -void __block_invoke_2(struct __block_literal_2 *_block) { - printf("x is %d\n", _block->x); -} - -static struct __block_descriptor_2 { - unsigned long int reserved; - unsigned long int Block_size; -} __block_descriptor_2 = { 0, sizeof(struct __block_literal_2) }; - -and - - struct __block_literal_2 __block_literal_2 = { - &_NSConcreteStackBlock, - (1<<29), <uninitialized>, - __block_invoke_2, - &__block_descriptor_2, - x - }; - -In summary, scalars, structures, unions, and function pointers are generally imported as const copies with no need for helper functions. - -2.2 Imported const copy of Block reference - -The first case where copy and dispose helper functions are required is for the case of when a block itself is imported. In this case both a copy_helper function and a dispose_helper function are needed. The copy_helper function is passed both the existing stack based pointer and the pointer to the new heap version and should call back into the runtime to actually do the copy operation on the imported fields within the block. The runtime functions are all described in Section 5.0 Runtime Helper Functions. - -An example: - - void (^existingBlock)(void) = ...; - void (^vv)(void) = ^{ existingBlock(); } - vv(); - -struct __block_literal_3 { - ...; // existing block -}; - -struct __block_literal_4 { - void *isa; - int flags; - int reserved; - void (*invoke)(struct __block_literal_4 *); - struct __block_literal_3 *const existingBlock; -}; - -void __block_invoke_4(struct __block_literal_2 *_block) { - __block->existingBlock->invoke(__block->existingBlock); -} - -void __block_copy_4(struct __block_literal_4 *dst, struct __block_literal_4 *src) { - //_Block_copy_assign(&dst->existingBlock, src->existingBlock, 0); - _Block_object_assign(&dst->existingBlock, src->existingBlock, BLOCK_FIELD_IS_BLOCK); -} - -void __block_dispose_4(struct __block_literal_4 *src) { - // was _Block_destroy - _Block_object_dispose(src->existingBlock, BLOCK_FIELD_IS_BLOCK); -} - -static struct __block_descriptor_4 { - unsigned long int reserved; - unsigned long int Block_size; - void (*copy_helper)(struct __block_literal_4 *dst, struct __block_literal_4 *src); - void (*dispose_helper)(struct __block_literal_4 *); -} __block_descriptor_4 = { - 0, - sizeof(struct __block_literal_4), - __block_copy_4, - __block_dispose_4, -}; - -and where it is used - - struct __block_literal_4 _block_literal = { - &_NSConcreteStackBlock, - (1<<25)|(1<<29), <uninitialized> - __block_invoke_4, - & __block_descriptor_4 - existingBlock, - }; - -2.2.1 Importing __attribute__((NSObject)) variables. - -GCC introduces __attribute__((NSObject)) on structure pointers to mean "this is an object". This is useful because many low level data structures are declared as opaque structure pointers, e.g. CFStringRef, CFArrayRef, etc. When used from C, however, these are still really objects and are the second case where that requires copy and dispose helper functions to be generated. The copy helper functions generated by the compiler should use the _Block_object_assign runtime helper function and in the dispose helper the _Block_object_dispose runtime helper function should be called. - -For example, block xyzzy in the following - - struct Opaque *__attribute__((NSObject)) objectPointer = ...; - ... - void (^xyzzy)(void) = ^{ CFPrint(objectPointer); }; - -would have helper functions - -void __block_copy_xyzzy(struct __block_literal_5 *dst, struct __block_literal_5 *src) { - _Block_object_assign(&dst->objectPointer, src-> objectPointer, BLOCK_FIELD_IS_OBJECT); -} - -void __block_dispose_xyzzy(struct __block_literal_5 *src) { - _Block_object_dispose(src->objectPointer, BLOCK_FIELD_IS_OBJECT); -} - -generated. - - -2.3 Imported __block marked variables. - -2.3.1 Layout of __block marked variables - -The compiler must embed variables that are marked __block in a specialized structure of the form: - -struct _block_byref_xxxx { - void *isa; - struct Block_byref *forwarding; - int flags; //refcount; - int size; - typeof(marked_variable) marked_variable; -}; - -Variables of certain types require helper functions for when Block_copy() and Block_release() are performed upon a referencing Block. At the "C" level only variables that are of type Block or ones that have __attribute__((NSObject)) marked require helper functions. In Objective-C objects require helper functions and in C++ stack based objects require helper functions. Variables that require helper functions use the form: - -struct _block_byref_xxxx { - void *isa; - struct _block_byref_xxxx *forwarding; - int flags; //refcount; - int size; - // helper functions called via Block_copy() and Block_release() - void (*byref_keep)(void *dst, void *src); - void (*byref_dispose)(void *); - typeof(marked_variable) marked_variable; -}; - -The structure is initialized such that - a) the forwarding pointer is set to the beginning of its enclosing structure, - b) the size field is initialized to the total size of the enclosing structure, - c) the flags field is set to either 0 if no helper functions are needed or (1<<25) if they are, - d) the helper functions are initialized (if present) - e) the variable itself is set to its initial value. - f) the isa field is set to NULL - -2.3.2 Access to __block variables from within its lexical scope. - -In order to "move" the variable to the heap upon a copy_helper operation the compiler must rewrite access to such a variable to be indirect through the structures forwarding pointer. For example: - - int __block i = 10; - i = 11; - -would be rewritten to be: - - struct _block_byref_i { - void *isa; - struct _block_byref_i *forwarding; - int flags; //refcount; - int size; - int captured_i; - } i = { NULL, &i, 0, sizeof(struct _block_byref_i), 10 }; - - i.forwarding->captured_i = 11; - -In the case of a Block reference variable being marked __block the helper code generated must use the _Block_object_assign and _Block_object_dispose routines supplied by the runtime to make the copies. For example: - - __block void (voidBlock)(void) = blockA; - voidBlock = blockB; - -would translate into - -struct _block_byref_voidBlock { - void *isa; - struct _block_byref_voidBlock *forwarding; - int flags; //refcount; - int size; - void (*byref_keep)(struct _block_byref_voidBlock *dst, struct _block_byref_voidBlock *src); - void (*byref_dispose)(struct _block_byref_voidBlock *); - void (^captured_voidBlock)(void); -}; - -void _block_byref_keep_helper(struct _block_byref_voidBlock *dst, struct _block_byref_voidBlock *src) { - //_Block_copy_assign(&dst->captured_voidBlock, src->captured_voidBlock, 0); - _Block_object_assign(&dst->captured_voidBlock, src->captured_voidBlock, BLOCK_FIELD_IS_BLOCK | BLOCK_BYREF_CALLER); -} - -void _block_byref_dispose_helper(struct _block_byref_voidBlock *param) { - //_Block_destroy(param->captured_voidBlock, 0); - _Block_object_dispose(param->captured_voidBlock, BLOCK_FIELD_IS_BLOCK | BLOCK_BYREF_CALLER)} - -and - struct _block_byref_voidBlock voidBlock = {( .forwarding=&voidBlock, .flags=(1<<25), .size=sizeof(struct _block_byref_voidBlock *), - .byref_keep=_block_byref_keep_helper, .byref_dispose=_block_byref_dispose_helper, - .captured_voidBlock=blockA )}; - - voidBlock.forwarding->captured_voidBlock = blockB; - - -2.3.3 Importing __block variables into Blocks - -A Block that uses a __block variable in its compound statement body must import the variable and emit copy_helper and dispose_helper helper functions that, in turn, call back into the runtime to actually copy or release the byref data block using the functions _Block_object_assign and _Block_object_dispose. - -For example: - - int __block i = 2; - functioncall(^{ i = 10; }); - -would translate to - -struct _block_byref_i { - void *isa; // set to NULL - struct _block_byref_voidBlock *forwarding; - int flags; //refcount; - int size; - void (*byref_keep)(struct _block_byref_i *dst, struct _block_byref_i *src); - void (*byref_dispose)(struct _block_byref_i *); - int captured_i; -}; - - -struct __block_literal_5 { - void *isa; - int flags; - int reserved; - void (*invoke)(struct __block_literal_5 *); - struct __block_descriptor_5 *descriptor; - struct _block_byref_i *i_holder; -}; - -void __block_invoke_5(struct __block_literal_5 *_block) { - _block->forwarding->captured_i = 10; -} - -void __block_copy_5(struct __block_literal_5 *dst, struct __block_literal_5 *src) { - //_Block_byref_assign_copy(&dst->captured_i, src->captured_i); - _Block_object_assign(&dst->captured_i, src->captured_i, BLOCK_FIELD_IS_BYREF | BLOCK_BYREF_CALLER); -} - -void __block_dispose_5(struct __block_literal_5 *src) { - //_Block_byref_release(src->captured_i); - _Block_object_dispose(src->captured_i, BLOCK_FIELD_IS_BYREF | BLOCK_BYREF_CALLER); -} - -static struct __block_descriptor_5 { - unsigned long int reserved; - unsigned long int Block_size; - void (*copy_helper)(struct __block_literal_5 *dst, struct __block_literal_5 *src); - void (*dispose_helper)(struct __block_literal_5 *); -} __block_descriptor_5 = { 0, sizeof(struct __block_literal_5) __block_copy_5, __block_dispose_5 }; - -and - - struct _block_byref_i i = {( .forwarding=&i, .flags=0, .size=sizeof(struct _block_byref_i) )}; - struct __block_literal_5 _block_literal = { - &_NSConcreteStackBlock, - (1<<25)|(1<<29), <uninitialized>, - __block_invoke_5, - &__block_descriptor_5, - 2, - }; - -2.3.4 Importing __attribute__((NSObject)) __block variables - -A __block variable that is also marked __attribute__((NSObject)) should have byref_keep and byref_dispose helper functions that use _Block_object_assign and _Block_object_dispose. - -2.3.5 __block escapes - -Because Blocks referencing __block variables may have Block_copy() performed upon them the underlying storage for the variables may move to the heap. In Objective-C Garbage Collection Only compilation environments the heap used is the garbage collected one and no further action is required. Otherwise the compiler must issue a call to potentially release any heap storage for __block variables at all escapes or terminations of their scope. The call should be: - - _Block_object_dispose(&_block_byref_xxx, BLOCK_FIELD_IS_BYREF); - - -2.3.6 Nesting - -Blocks may contain Block literal expressions. Any variables used within inner blocks are imported into all enclosing Block scopes even if the variables are not used. This includes const imports as well as __block variables. - -3. Objective C Extensions to Blocks - -3.1 Importing Objects - -Objects should be treated as __attribute__((NSObject)) variables; all copy_helper, dispose_helper, byref_keep, and byref_dispose helper functions should use _Block_object_assign and _Block_object_dispose. There should be no code generated that uses -retain or -release methods. - - -3.2 Blocks as Objects - -The compiler will treat Blocks as objects when synthesizing property setters and getters, will characterize them as objects when generating garbage collection strong and weak layout information in the same manner as objects, and will issue strong and weak write-barrier assignments in the same manner as objects. - -3.3 __weak __block Support - -Objective-C (and Objective-C++) support the __weak attribute on __block variables. Under normal circumstances the compiler uses the Objective-C runtime helper support functions objc_assign_weak and objc_read_weak. Both should continue to be used for all reads and writes of __weak __block variables: - objc_read_weak(&block->byref_i->forwarding->i) - -The __weak variable is stored in a _block_byref_xxxx structure and the Block has copy and dispose helpers for this structure that call: - _Block_object_assign(&dest->_block_byref_i, src-> _block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_BYREF); -and - _Block_object_dispose(src->_block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_BYREF); - - -In turn, the block_byref copy support helpers distinguish between whether the __block variable is a Block or not and should either call: - _Block_object_assign(&dest->_block_byref_i, src->_block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_OBJECT | BLOCK_BYREF_CALLER); -for something declared as an object or - _Block_object_assign(&dest->_block_byref_i, src->_block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_BLOCK | BLOCK_BYREF_CALLER); -for something declared as a Block. - -A full example follows: - - - __block __weak id obj = <initialization expression>; - functioncall(^{ [obj somemessage]; }); - -would translate to - -struct _block_byref_obj { - void *isa; // uninitialized - struct _block_byref_obj *forwarding; - int flags; //refcount; - int size; - void (*byref_keep)(struct _block_byref_i *dst, struct _block_byref_i *src); - void (*byref_dispose)(struct _block_byref_i *); - id captured_obj; -}; - -void _block_byref_obj_keep(struct _block_byref_voidBlock *dst, struct _block_byref_voidBlock *src) { - //_Block_copy_assign(&dst->captured_obj, src->captured_obj, 0); - _Block_object_assign(&dst->captured_obj, src->captured_obj, BLOCK_FIELD_IS_OBJECT | BLOCK_FIELD_IS_WEAK | BLOCK_BYREF_CALLER); -} - -void _block_byref_obj_dispose(struct _block_byref_voidBlock *param) { - //_Block_destroy(param->captured_obj, 0); - _Block_object_dispose(param->captured_obj, BLOCK_FIELD_IS_OBJECT | BLOCK_FIELD_IS_WEAK | BLOCK_BYREF_CALLER); -}; - -for the block byref part and - -struct __block_literal_5 { - void *isa; - int flags; - int reserved; - void (*invoke)(struct __block_literal_5 *); - struct __block_descriptor_5 *descriptor; - struct _block_byref_obj *byref_obj; -}; - -void __block_invoke_5(struct __block_literal_5 *_block) { - [objc_read_weak(&_block->byref_obj->forwarding->captured_obj) somemessage]; -} - -void __block_copy_5(struct __block_literal_5 *dst, struct __block_literal_5 *src) { - //_Block_byref_assign_copy(&dst->byref_obj, src->byref_obj); - _Block_object_assign(&dst->byref_obj, src->byref_obj, BLOCK_FIELD_IS_BYREF | BLOCK_FIELD_IS_WEAK); -} - -void __block_dispose_5(struct __block_literal_5 *src) { - //_Block_byref_release(src->byref_obj); - _Block_object_dispose(src->byref_obj, BLOCK_FIELD_IS_BYREF | BLOCK_FIELD_IS_WEAK); -} - -static struct __block_descriptor_5 { - unsigned long int reserved; - unsigned long int Block_size; - void (*copy_helper)(struct __block_literal_5 *dst, struct __block_literal_5 *src); - void (*dispose_helper)(struct __block_literal_5 *); -} __block_descriptor_5 = { 0, sizeof(struct __block_literal_5), __block_copy_5, __block_dispose_5 }; - -and within the compound statement: - - struct _block_byref_obj obj = {( .forwarding=&obj, .flags=(1<<25), .size=sizeof(struct _block_byref_obj), - .byref_keep=_block_byref_obj_keep, .byref_dispose=_block_byref_obj_dispose, - .captured_obj = <initialization expression> )}; - - struct __block_literal_5 _block_literal = { - &_NSConcreteStackBlock, - (1<<25)|(1<<29), <uninitialized>, - __block_invoke_5, - &__block_descriptor_5, - &obj, // a reference to the on-stack structure containing "captured_obj" - }; - - - functioncall(_block_literal->invoke(&_block_literal)); - - -4.0 C++ Support - -Within a block stack based C++ objects are copied into const copies using the copy constructor. It is an error if a stack based C++ object is used within a block if it does not have a copy constructor. In addition both copy and destroy helper routines must be synthesized for the block to support the Block_copy() operation, and the flags work marked with the (1<<26) bit in addition to the (1<<25) bit. The copy helper should call the constructor using appropriate offsets of the variable within the supplied stack based block source and heap based destination for all const constructed copies, and similarly should call the destructor in the destroy routine. - -As an example, suppose a C++ class FOO existed with a copy constructor. Within a code block a stack version of a FOO object is declared and used within a Block literal expression: - -{ - FOO foo; - void (^block)(void) = ^{ printf("%d\n", foo.value()); }; -} - -The compiler would synthesize - -struct __block_literal_10 { - void *isa; - int flags; - int reserved; - void (*invoke)(struct __block_literal_10 *); - struct __block_descriptor_10 *descriptor; - const FOO foo; -}; - -void __block_invoke_10(struct __block_literal_10 *_block) { - printf("%d\n", _block->foo.value()); -} - -void __block_literal_10(struct __block_literal_10 *dst, struct __block_literal_10 *src) { - FOO_ctor(&dst->foo, &src->foo); -} - -void __block_dispose_10(struct __block_literal_10 *src) { - FOO_dtor(&src->foo); -} - -static struct __block_descriptor_10 { - unsigned long int reserved; - unsigned long int Block_size; - void (*copy_helper)(struct __block_literal_10 *dst, struct __block_literal_10 *src); - void (*dispose_helper)(struct __block_literal_10 *); -} __block_descriptor_10 = { 0, sizeof(struct __block_literal_10), __block_copy_10, __block_dispose_10 }; - -and the code would be: -{ - FOO foo; - comp_ctor(&foo); // default constructor - struct __block_literal_10 _block_literal = { - &_NSConcreteStackBlock, - (1<<25)|(1<<26)|(1<<29), <uninitialized>, - __block_invoke_10, - &__block_descriptor_10, - }; - comp_ctor(&_block_literal->foo, &foo); // const copy into stack version - struct __block_literal_10 &block = &_block_literal; // assign literal to block variable - block->invoke(block); // invoke block - comp_dtor(&_block_literal->foo); // destroy stack version of const block copy - comp_dtor(&foo); // destroy original version -} - - -C++ objects stored in __block storage start out on the stack in a block_byref data structure as do other variables. Such objects (if not const objects) must support a regular copy constructor. The block_byref data structure will have copy and destroy helper routines synthesized by the compiler. The copy helper will have code created to perform the copy constructor based on the initial stack block_byref data structure, and will also set the (1<<26) bit in addition to the (1<<25) bit. The destroy helper will have code to do the destructor on the object stored within the supplied block_byref heap data structure. For example, - - __block FOO blockStorageFoo; - -requires the normal constructor for the embedded blockStorageFoo object - - FOO_ctor(& _block_byref_blockStorageFoo->blockStorageFoo); - -and at scope termination the destructor: - - FOO_dtor(& _block_byref_blockStorageFoo->blockStorageFoo); - -Note that the forwarding indirection is NOT used. - -The compiler would need to generate (if used from a block literal) the following copy/dispose helpers: - -void _block_byref_obj_keep(struct _block_byref_blockStorageFoo *dst, struct _block_byref_blockStorageFoo *src) { - FOO_ctor(&dst->blockStorageFoo, &src->blockStorageFoo); -} - -void _block_byref_obj_dispose(struct _block_byref_blockStorageFoo *src) { - FOO_dtor(&src->blockStorageFoo); -} - -for the appropriately named constructor and destructor for the class/struct FOO. - -To support member variable and function access the compiler will synthesize a const pointer to a block version of the "this" pointer. - -5.0 Runtime Helper Functions - -The runtime helper functions are described in /usr/local/include/Block_private.h. To summarize their use, a block requires copy/dispose helpers if it imports any block variables, __block storage variables, __attribute__((NSObject)) variables, or C++ const copied objects with constructor/destructors. The (1<<26) bit is set and functions are generated. - -The block copy helper function should, for each of the variables of the type mentioned above, call - _Block_object_assign(&dst->target, src->target, BLOCK_FIELD_<appropo>); -in the copy helper and - _Block_object_dispose(->target, BLOCK_FIELD_<appropo>); -in the dispose helper where - <appropo> is - -enum { - BLOCK_FIELD_IS_OBJECT = 3, // id, NSObject, __attribute__((NSObject)), block, ... - BLOCK_FIELD_IS_BLOCK = 7, // a block variable - BLOCK_FIELD_IS_BYREF = 8, // the on stack structure holding the __block variable - - BLOCK_FIELD_IS_WEAK = 16, // declared __weak - - BLOCK_BYREF_CALLER = 128, // called from byref copy/dispose helpers -}; - -and of course the CTORs/DTORs for const copied C++ objects. - -The block_byref data structure similarly requires copy/dispose helpers for block variables, __attribute__((NSObject)) variables, or C++ const copied objects with constructor/destructors, and again the (1<<26) bit is set and functions are generated in the same manner. - -Under ObjC we allow __weak as an attribute on __block variables, and this causes the addition of BLOCK_FIELD_IS_WEAK orred onto the BLOCK_FIELD_IS_BYREF flag when copying the block_byref structure in the block copy helper, and onto the BLOCK_FIELD_<appropo> field within the block_byref copy/dispose helper calls. - -The prototypes, and summary, of the helper functions are - -/* Certain field types require runtime assistance when being copied to the heap. The following function is used - to copy fields of types: blocks, pointers to byref structures, and objects (including __attribute__((NSObject)) pointers. - BLOCK_FIELD_IS_WEAK is orthogonal to the other choices which are mutually exclusive. - Only in a Block copy helper will one see BLOCK_FIELD_IS_BYREF. - */ -void _Block_object_assign(void *destAddr, const void *object, const int flags); - -/* Similarly a compiler generated dispose helper needs to call back for each field of the byref data structure. - (Currently the implementation only packs one field into the byref structure but in principle there could be more). - The same flags used in the copy helper should be used for each call generated to this function: - */ -void _Block_object_dispose(const void *object, const int flags); +*NOTE* This document has moved to http://clang.llvm.org/docs/Block-ABI-Apple.html. diff --git a/docs/BlockLanguageSpec.rst b/docs/BlockLanguageSpec.rst new file mode 100644 index 0000000..3632d56 --- /dev/null +++ b/docs/BlockLanguageSpec.rst @@ -0,0 +1,361 @@ + +.. role:: block-term + +================================= +Language Specification for Blocks +================================= + +.. contents:: + :local: + +Revisions +========= + +- 2008/2/25 --- created +- 2008/7/28 --- revised, ``__block`` syntax +- 2008/8/13 --- revised, Block globals +- 2008/8/21 --- revised, C++ elaboration +- 2008/11/1 --- revised, ``__weak`` support +- 2009/1/12 --- revised, explicit return types +- 2009/2/10 --- revised, ``__block`` objects need retain + +Overview +======== + +A new derived type is introduced to C and, by extension, Objective-C, +C++, and Objective-C++ + +The Block Type +============== + +Like function types, the :block-term:`Block type` is a pair consisting +of a result value type and a list of parameter types very similar to a +function type. Blocks are intended to be used much like functions with +the key distinction being that in addition to executable code they +also contain various variable bindings to automatic (stack) or managed +(heap) memory. + +The abstract declarator, + +.. code-block:: c + + int (^)(char, float) + +describes a reference to a Block that, when invoked, takes two +parameters, the first of type char and the second of type float, and +returns a value of type int. The Block referenced is of opaque data +that may reside in automatic (stack) memory, global memory, or heap +memory. + +Block Variable Declarations +=========================== + +A :block-term:`variable with Block type` is declared using function +pointer style notation substituting ``^`` for ``*``. The following are +valid Block variable declarations: + +.. code-block:: c + + void (^blockReturningVoidWithVoidArgument)(void); + int (^blockReturningIntWithIntAndCharArguments)(int, char); + void (^arrayOfTenBlocksReturningVoidWithIntArgument[10])(int); + +Variadic ``...`` arguments are supported. [variadic.c] A Block that +takes no arguments must specify void in the argument list [voidarg.c]. +An empty parameter list does not represent, as K&R provide, an +unspecified argument list. Note: both gcc and clang support K&R style +as a convenience. + +A Block reference may be cast to a pointer of arbitrary type and vice +versa. [cast.c] A Block reference may not be dereferenced via the +pointer dereference operator ``*``, and thus a Block's size may not be +computed at compile time. [sizeof.c] + +Block Literal Expressions +========================= + +A :block-term:`Block literal expression` produces a reference to a +Block. It is introduced by the use of the ``^`` token as a unary +operator. + +.. code-block:: c + + Block_literal_expression ::= ^ block_decl compound_statement_body + block_decl ::= + block_decl ::= parameter_list + block_decl ::= type_expression + +where type expression is extended to allow ``^`` as a Block reference +(pointer) where ``*`` is allowed as a function reference (pointer). + +The following Block literal: + +.. code-block:: c + + ^ void (void) { printf("hello world\n"); } + +produces a reference to a Block with no arguments with no return value. + +The return type is optional and is inferred from the return +statements. If the return statements return a value, they all must +return a value of the same type. If there is no value returned the +inferred type of the Block is void; otherwise it is the type of the +return statement value. + +If the return type is omitted and the argument list is ``( void )``, +the ``( void )`` argument list may also be omitted. + +So: + +.. code-block:: c + + ^ ( void ) { printf("hello world\n"); } + +and: + +.. code-block:: c + + ^ { printf("hello world\n"); } + +are exactly equivalent constructs for the same expression. + +The type_expression extends C expression parsing to accommodate Block +reference declarations as it accommodates function pointer +declarations. + +Given: + +.. code-block:: c + + typedef int (*pointerToFunctionThatReturnsIntWithCharArg)(char); + pointerToFunctionThatReturnsIntWithCharArg functionPointer; + ^ pointerToFunctionThatReturnsIntWithCharArg (float x) { return functionPointer; } + +and: + +.. code-block:: c + + ^ int ((*)(float x))(char) { return functionPointer; } + +are equivalent expressions, as is: + +.. code-block:: c + + ^(float x) { return functionPointer; } + +[returnfunctionptr.c] + +The compound statement body establishes a new lexical scope within +that of its parent. Variables used within the scope of the compound +statement are bound to the Block in the normal manner with the +exception of those in automatic (stack) storage. Thus one may access +functions and global variables as one would expect, as well as static +local variables. [testme] + +Local automatic (stack) variables referenced within the compound +statement of a Block are imported and captured by the Block as const +copies. The capture (binding) is performed at the time of the Block +literal expression evaluation. + +The compiler is not required to capture a variable if it can prove +that no references to the variable will actually be evaluated. +Programmers can force a variable to be captured by referencing it in a +statement at the beginning of the Block, like so: + +.. code-block:: c + + (void) foo; + +This matters when capturing the variable has side-effects, as it can +in Objective-C or C++. + +The lifetime of variables declared in a Block is that of a function; +each activation frame contains a new copy of variables declared within +the local scope of the Block. Such variable declarations should be +allowed anywhere [testme] rather than only when C99 parsing is +requested, including for statements. [testme] + +Block literal expressions may occur within Block literal expressions +(nest) and all variables captured by any nested blocks are implicitly +also captured in the scopes of their enclosing Blocks. + +A Block literal expression may be used as the initialization value for +Block variables at global or local static scope. + +The Invoke Operator +=================== + +Blocks are :block-term:`invoked` using function call syntax with a +list of expression parameters of types corresponding to the +declaration and returning a result type also according to the +declaration. Given: + +.. code-block:: c + + int (^x)(char); + void (^z)(void); + int (^(*y))(char) = &x; + +the following are all legal Block invocations: + +.. code-block:: c + + x('a'); + (*y)('a'); + (true ? x : *y)('a') + +The Copy and Release Operations +=============================== + +The compiler and runtime provide :block-term:`copy` and +:block-term:`release` operations for Block references that create and, +in matched use, release allocated storage for referenced Blocks. + +The copy operation ``Block_copy()`` is styled as a function that takes +an arbitrary Block reference and returns a Block reference of the same +type. The release operation, ``Block_release()``, is styled as a +function that takes an arbitrary Block reference and, if dynamically +matched to a Block copy operation, allows recovery of the referenced +allocated memory. + + +The ``__block`` Storage Qualifier +================================= + +In addition to the new Block type we also introduce a new storage +qualifier, :block-term:`__block`, for local variables. [testme: a +__block declaration within a block literal] The ``__block`` storage +qualifier is mutually exclusive to the existing local storage +qualifiers auto, register, and static. [testme] Variables qualified by +``__block`` act as if they were in allocated storage and this storage +is automatically recovered after last use of said variable. An +implementation may choose an optimization where the storage is +initially automatic and only "moved" to allocated (heap) storage upon +a Block_copy of a referencing Block. Such variables may be mutated as +normal variables are. + +In the case where a ``__block`` variable is a Block one must assume +that the ``__block`` variable resides in allocated storage and as such +is assumed to reference a Block that is also in allocated storage +(that it is the result of a ``Block_copy`` operation). Despite this +there is no provision to do a ``Block_copy`` or a ``Block_release`` if +an implementation provides initial automatic storage for Blocks. This +is due to the inherent race condition of potentially several threads +trying to update the shared variable and the need for synchronization +around disposing of older values and copying new ones. Such +synchronization is beyond the scope of this language specification. + + +Control Flow +============ + +The compound statement of a Block is treated much like a function body +with respect to control flow in that goto, break, and continue do not +escape the Block. Exceptions are treated *normally* in that when +thrown they pop stack frames until a catch clause is found. + + +Objective-C Extensions +====================== + +Objective-C extends the definition of a Block reference type to be +that also of id. A variable or expression of Block type may be +messaged or used as a parameter wherever an id may be. The converse is +also true. Block references may thus appear as properties and are +subject to the assign, retain, and copy attribute logic that is +reserved for objects. + +All Blocks are constructed to be Objective-C objects regardless of +whether the Objective-C runtime is operational in the program or +not. Blocks using automatic (stack) memory are objects and may be +messaged, although they may not be assigned into ``__weak`` locations +if garbage collection is enabled. + +Within a Block literal expression within a method definition +references to instance variables are also imported into the lexical +scope of the compound statement. These variables are implicitly +qualified as references from self, and so self is imported as a const +copy. The net effect is that instance variables can be mutated. + +The :block-term:`Block_copy` operator retains all objects held in +variables of automatic storage referenced within the Block expression +(or form strong references if running under garbage collection). +Object variables of ``__block`` storage type are assumed to hold +normal pointers with no provision for retain and release messages. + +Foundation defines (and supplies) ``-copy`` and ``-release`` methods for +Blocks. + +In the Objective-C and Objective-C++ languages, we allow the +``__weak`` specifier for ``__block`` variables of object type. If +garbage collection is not enabled, this qualifier causes these +variables to be kept without retain messages being sent. This +knowingly leads to dangling pointers if the Block (or a copy) outlives +the lifetime of this object. + +In garbage collected environments, the ``__weak`` variable is set to +nil when the object it references is collected, as long as the +``__block`` variable resides in the heap (either by default or via +``Block_copy()``). The initial Apple implementation does in fact +start ``__block`` variables on the stack and migrate them to the heap +only as a result of a ``Block_copy()`` operation. + +It is a runtime error to attempt to assign a reference to a +stack-based Block into any storage marked ``__weak``, including +``__weak`` ``__block`` variables. + + +C++ Extensions +============== + +Block literal expressions within functions are extended to allow const +use of C++ objects, pointers, or references held in automatic storage. + +As usual, within the block, references to captured variables become +const-qualified, as if they were references to members of a const +object. Note that this does not change the type of a variable of +reference type. + +For example, given a class Foo: + +.. code-block:: c + + Foo foo; + Foo &fooRef = foo; + Foo *fooPtr = &foo; + +A Block that referenced these variables would import the variables as +const variations: + +.. code-block:: c + + const Foo block_foo = foo; + Foo &block_fooRef = fooRef; + Foo *const block_fooPtr = fooPtr; + +Captured variables are copied into the Block at the instant of +evaluating the Block literal expression. They are also copied when +calling ``Block_copy()`` on a Block allocated on the stack. In both +cases, they are copied as if the variable were const-qualified, and +it's an error if there's no such constructor. + +Captured variables in Blocks on the stack are destroyed when control +leaves the compound statement that contains the Block literal +expression. Captured variables in Blocks on the heap are destroyed +when the reference count of the Block drops to zero. + +Variables declared as residing in ``__block`` storage may be initially +allocated in the heap or may first appear on the stack and be copied +to the heap as a result of a ``Block_copy()`` operation. When copied +from the stack, ``__block`` variables are copied using their normal +qualification (i.e. without adding const). In C++11, ``__block`` +variables are copied as x-values if that is possible, then as l-values +if not; if both fail, it's an error. The destructor for any initial +stack-based version is called at the variable's normal end of scope. + +References to ``this``, as well as references to non-static members of +any enclosing class, are evaluated by capturing ``this`` just like a +normal variable of C pointer type. + +Member variables that are Blocks may not be overloaded by the types of +their arguments. diff --git a/docs/BlockLanguageSpec.txt b/docs/BlockLanguageSpec.txt deleted file mode 100644 index 4cdf75a..0000000 --- a/docs/BlockLanguageSpec.txt +++ /dev/null @@ -1,171 +0,0 @@ -Language Specification for Blocks - -2008/2/25 — created -2008/7/28 — revised, __block syntax -2008/8/13 — revised, Block globals -2008/8/21 — revised, C++ elaboration -2008/11/1 — revised, __weak support -2009/1/12 — revised, explicit return types -2009/2/10 — revised, __block objects need retain - -Copyright 2008-2009 Apple, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -The Block Type - -A new derived type is introduced to C and, by extension, Objective-C, C++, and Objective-C++. Like function types, the Block type is a pair consisting of a result value type and a list of parameter types very similar to a function type. Blocks are intended to be used much like functions with the key distinction being that in addition to executable code they also contain various variable bindings to automatic (stack) or managed (heap) memory. - -The abstract declarator int (^)(char, float) describes a reference to a Block that, when invoked, takes two parameters, the first of type char and the second of type float, and returns a value of type int. The Block referenced is of opaque data that may reside in automatic (stack) memory, global memory, or heap memory. - - -Block Variable Declarations - -A variable with Block type is declared using function pointer style notation substituting ^ for *. The following are valid Block variable declarations: - void (^blockReturningVoidWithVoidArgument)(void); - int (^blockReturningIntWithIntAndCharArguments)(int, char); - void (^arrayOfTenBlocksReturningVoidWithIntArgument[10])(int); - -Variadic ... arguments are supported. [variadic.c] A Block that takes no arguments must specify void in the argument list [voidarg.c]. An empty parameter list does not represent, as K&R provide, an unspecified argument list. Note: both gcc and clang support K&R style as a convenience. - -A Block reference may be cast to a pointer of arbitrary type and vice versa. [cast.c] A Block reference may not be dereferenced via the pointer dereference operator *, and thus a Block's size may not be computed at compile time. [sizeof.c] - - -Block Literal Expressions - -A Block literal expression produces a reference to a Block. It is introduced by the use of the ^ token as a unary operator. - Block_literal_expression ::= ^ block_decl compound_statement_body - block_decl ::= - block_decl ::= parameter_list - block_decl ::= type_expression - -...where type expression is extended to allow ^ as a Block reference (pointer) where * is allowed as a function reference (pointer). - -The following Block literal: - ^ void (void) { printf("hello world\n"); } - -...produces a reference to a Block with no arguments with no return value. - -The return type is optional and is inferred from the return statements. If the return statements return a value, they all must return a value of the same type. If there is no value returned the inferred type of the Block is void; otherwise it is the type of the return statement value. - -If the return type is omitted and the argument list is ( void ), the ( void ) argument list may also be omitted. - -So: - ^ ( void ) { printf("hello world\n"); } - -...and: - ^ { printf("hello world\n"); } - -...are exactly equivalent constructs for the same expression. - -The type_expression extends C expression parsing to accommodate Block reference declarations as it accommodates function pointer declarations. - -Given: - typedef int (*pointerToFunctionThatReturnsIntWithCharArg)(char); - pointerToFunctionThatReturnsIntWithCharArg functionPointer; - - ^ pointerToFunctionThatReturnsIntWithCharArg (float x) { return functionPointer; } - -...and: - ^ int ((*)(float x))(char) { return functionPointer; } - -...are equivalent expressions, as is: - - ^(float x) { return functionPointer; } - -[returnfunctionptr.c] - -The compound statement body establishes a new lexical scope within that of its parent. Variables used within the scope of the compound statement are bound to the Block in the normal manner with the exception of those in automatic (stack) storage. Thus one may access functions and global variables as one would expect, as well as static local variables. [testme] - -Local automatic (stack) variables referenced within the compound statement of a Block are imported and captured by the Block as const copies. The capture (binding) is performed at the time of the Block literal expression evaluation. - -The compiler is not required to capture a variable if it can prove that no references to the variable will actually be evaluated. Programmers can force a variable to be captured by referencing it in a statement at the beginning of the Block, like so: - (void) foo; -This matters when capturing the variable has side-effects, as it can in Objective-C or C++. - -The lifetime of variables declared in a Block is that of a function; each activation frame contains a new copy of variables declared within the local scope of the Block. Such variable declarations should be allowed anywhere [testme] rather than only when C99 parsing is requested, including for statements. [testme] - -Block literal expressions may occur within Block literal expressions (nest) and all variables captured by any nested blocks are implicitly also captured in the scopes of their enclosing Blocks. - -A Block literal expression may be used as the initialization value for Block variables at global or local static scope. - - -The Invoke Operator - -Blocks are invoked using function call syntax with a list of expression parameters of types corresponding to the declaration and returning a result type also according to the declaration. Given: - int (^x)(char); - void (^z)(void); - int (^(*y))(char) = &x; - -...the following are all legal Block invocations: - x('a'); - (*y)('a'); - (true ? x : *y)('a') - - -The Copy and Release Operations - -The compiler and runtime provide copy and release operations for Block references that create and, in matched use, release allocated storage for referenced Blocks. - -The copy operation Block_copy() is styled as a function that takes an arbitrary Block reference and returns a Block reference of the same type. The release operation, Block_release(), is styled as a function that takes an arbitrary Block reference and, if dynamically matched to a Block copy operation, allows recovery of the referenced allocated memory. - - -The __block Storage Qualifier - -In addition to the new Block type we also introduce a new storage qualifier, __block, for local variables. [testme: a __block declaration within a block literal] The __block storage qualifier is mutually exclusive to the existing local storage qualifiers auto, register, and static.[testme] Variables qualified by __block act as if they were in allocated storage and this storage is automatically recovered after last use of said variable. An implementation may choose an optimization where the storage is initially automatic and only "moved" to allocated (heap) storage upon a Block_copy of a referencing Block. Such variables may be mutated as normal variables are. - -In the case where a __block variable is a Block one must assume that the __block variable resides in allocated storage and as such is assumed to reference a Block that is also in allocated storage (that it is the result of a Block_copy operation). Despite this there is no provision to do a Block_copy or a Block_release if an implementation provides initial automatic storage for Blocks. This is due to the inherent race condition of potentially several threads trying to update the shared variable and the need for synchronization around disposing of older values and copying new ones. Such synchronization is beyond the scope of this language specification. - - -Control Flow - -The compound statement of a Block is treated much like a function body with respect to control flow in that goto, break, and continue do not escape the Block. Exceptions are treated "normally" in that when thrown they pop stack frames until a catch clause is found. - - -Objective-C Extensions - -Objective-C extends the definition of a Block reference type to be that also of id. A variable or expression of Block type may be messaged or used as a parameter wherever an id may be. The converse is also true. Block references may thus appear as properties and are subject to the assign, retain, and copy attribute logic that is reserved for objects. - -All Blocks are constructed to be Objective-C objects regardless of whether the Objective-C runtime is operational in the program or not. Blocks using automatic (stack) memory are objects and may be messaged, although they may not be assigned into __weak locations if garbage collection is enabled. - -Within a Block literal expression within a method definition references to instance variables are also imported into the lexical scope of the compound statement. These variables are implicitly qualified as references from self, and so self is imported as a const copy. The net effect is that instance variables can be mutated. - -The Block_copy operator retains all objects held in variables of automatic storage referenced within the Block expression (or form strong references if running under garbage collection). Object variables of __block storage type are assumed to hold normal pointers with no provision for retain and release messages. - -Foundation defines (and supplies) -copy and -release methods for Blocks. - -In the Objective-C and Objective-C++ languages, we allow the __weak specifier for __block variables of object type. If garbage collection is not enabled, this qualifier causes these variables to be kept without retain messages being sent. This knowingly leads to dangling pointers if the Block (or a copy) outlives the lifetime of this object. - -In garbage collected environments, the __weak variable is set to nil when the object it references is collected, as long as the __block variable resides in the heap (either by default or via Block_copy()). The initial Apple implementation does in fact start __block variables on the stack and migrate them to the heap only as a result of a Block_copy() operation. - -It is a runtime error to attempt to assign a reference to a stack-based Block into any storage marked __weak, including __weak __block variables. - - -C++ Extensions - -Block literal expressions within functions are extended to allow const use of C++ objects, pointers, or references held in automatic storage. - -As usual, within the block, references to captured variables become const-qualified, as if they were references to members of a const object. Note that this does not change the type of a variable of reference type. - -For example, given a class Foo: - Foo foo; - Foo &fooRef = foo; - Foo *fooPtr = &foo; - -A Block that referenced these variables would import the variables as const variations: - const Foo block_foo = foo; - Foo &block_fooRef = fooRef; - Foo *const block_fooPtr = fooPtr; - -Captured variables are copied into the Block at the instant of evaluating the Block literal expression. They are also copied when calling Block_copy() on a Block allocated on the stack. In both cases, they are copied as if the variable were const-qualified, and it's an error if there's no such constructor. - -Captured variables in Blocks on the stack are destroyed when control leaves the compound statement that contains the Block literal expression. Captured variables in Blocks on the heap are destroyed when the reference count of the Block drops to zero. - -Variables declared as residing in __block storage may be initially allocated in the heap or may first appear on the stack and be copied to the heap as a result of a Block_copy() operation. When copied from the stack, __block variables are copied using their normal qualification (i.e. without adding const). In C++11, __block variables are copied as x-values if that is possible, then as l-values if not; if both fail, it's an error. The destructor for any initial stack-based version is called at the variable's normal end of scope. - -References to 'this', as well as references to non-static members of any enclosing class, are evaluated by capturing 'this' just like a normal variable of C pointer type. - -Member variables that are Blocks may not be overloaded by the types of their arguments. - diff --git a/docs/ClangCheck.rst b/docs/ClangCheck.rst new file mode 100644 index 0000000..4650049 --- /dev/null +++ b/docs/ClangCheck.rst @@ -0,0 +1,36 @@ +========== +ClangCheck +========== + +`ClangCheck` is a small wrapper around :doc:`LibTooling` which can be used to +do basic error checking and AST dumping. + +.. code-block:: console + + $ cat <<EOF > snippet.cc + > void f() { + > int a = 0 + > } + > EOF + $ ~/clang/build/bin/clang-check snippet.cc -ast-dump -- + Processing: /Users/danieljasper/clang/llvm/tools/clang/docs/snippet.cc. + /Users/danieljasper/clang/llvm/tools/clang/docs/snippet.cc:2:12: error: expected ';' at end of + declaration + int a = 0 + ^ + ; + (TranslationUnitDecl 0x7ff3a3029ed0 <<invalid sloc>> + (TypedefDecl 0x7ff3a302a410 <<invalid sloc>> __int128_t '__int128') + (TypedefDecl 0x7ff3a302a470 <<invalid sloc>> __uint128_t 'unsigned __int128') + (TypedefDecl 0x7ff3a302a830 <<invalid sloc>> __builtin_va_list '__va_list_tag [1]') + (FunctionDecl 0x7ff3a302a8d0 </Users/danieljasper/clang/llvm/tools/clang/docs/snippet.cc:1:1, line:3:1> f 'void (void)' + (CompoundStmt 0x7ff3a302aa10 <line:1:10, line:3:1> + (DeclStmt 0x7ff3a302a9f8 <line:2:3, line:3:1> + (VarDecl 0x7ff3a302a980 <line:2:3, col:11> a 'int' + (IntegerLiteral 0x7ff3a302a9d8 <col:11> 'int' 0)))))) + 1 error generated. + Error while processing snippet.cc. + +The '--' at the end is important as it prevents `clang-check` from search for a +compilation database. For more information on how to setup and use `clang-check` +in a project, see :doc:`HowToSetupToolingForLLVM`. diff --git a/docs/ClangFormat.rst b/docs/ClangFormat.rst new file mode 100644 index 0000000..92d7fc3 --- /dev/null +++ b/docs/ClangFormat.rst @@ -0,0 +1,93 @@ +=========== +ClangFormat +=========== + +`ClangFormat` describes a set of tools that are built on top of +:doc:`LibFormat`. It can support your workflow in a variety of ways including a +standalone tool and editor integrations. + + +Standalone Tool +=============== + +:program:`clang-format` is located in `clang/tools/clang-format` and can be used +to format C/C++/Obj-C code. + +.. code-block:: console + + $ clang-format --help + OVERVIEW: A tool to format C/C++/Obj-C code. + + Currently supports LLVM and Google style guides. + If no arguments are specified, it formats the code from standard input + and writes the result to the standard output. + If <file> is given, it reformats the file. If -i is specified together + with <file>, the file is edited in-place. Otherwise, the result is + written to the standard output. + + USAGE: clang-format [options] [<file>] + + OPTIONS: + -fatal-assembler-warnings - Consider warnings as error + -help - Display available options (-help-hidden for more) + -i - Inplace edit <file>, if specified. + -length=<int> - Format a range of this length, -1 for end of file. + -offset=<int> - Format a range starting at this file offset. + -stats - Enable statistics output from program + -style=<string> - Coding style, currently supports: LLVM, Google, Chromium. + -version - Display the version of this program + + +Vim Integration +=============== + +There is an integration for :program:`vim` which lets you run the +:program:`clang-format` standalone tool on your current buffer, optionally +selecting regions to reformat. The integration has the form of a `python`-file +which can be found under `clang/tools/clang-format/clang-format.py`. + +This can be integrated by adding the following to your `.vimrc`: + +.. code-block:: vim + + map <C-K> :pyf <path-to-this-file>/clang-format.py<CR> + imap <C-K> <ESC>:pyf <path-to-this-file>/clang-format.py<CR>i + +The first line enables :program:`clang-format` for NORMAL and VISUAL mode, the +second line adds support for INSERT mode. Change "C-K" to another binding if +you need :program:`clang-format` on a different key (C-K stands for Ctrl+k). + +With this integration you can press the bound key and clang-format will +format the current line in NORMAL and INSERT mode or the selected region in +VISUAL mode. The line or region is extended to the next bigger syntactic +entity. + +It operates on the current, potentially unsaved buffer and does not create +or save any files. To revert a formatting, just undo. + + +Script for patch reformatting +============================= + +The python script `clang/tools/clang-format-diff.py` parses the output of +a unified diff and reformats all contained lines with :program:`clang-format`. + +.. code-block:: console + + usage: clang-format-diff.py [-h] [-p P] [-style STYLE] + + Reformat changed lines in diff + + optional arguments: + -h, --help show this help message and exit + -p P strip the smallest prefix containing P slashes + -style STYLE formatting style to apply (LLVM, Google) + +So to reformat all the lines in the latest :program:`git` commit, just do: + +.. code-block:: console + + git diff -U0 HEAD^ | clang-format-diff.py + +The :option:`-U0` will create a diff without context lines (the script would format +those as well). diff --git a/docs/ClangPlugins.html b/docs/ClangPlugins.html deleted file mode 100644 index ed560fe..0000000 --- a/docs/ClangPlugins.html +++ /dev/null @@ -1,170 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>Clang Plugins</title> -<link type="text/css" rel="stylesheet" href="../menu.css"> -<link type="text/css" rel="stylesheet" href="../content.css"> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>Clang Plugins</h1> -<p>Clang Plugins make it possible to run extra user defined actions during -a compilation. This document will provide a basic walkthrough of how to write -and run a Clang Plugin.</p> - -<!-- ======================================================================= --> -<h2 id="intro">Introduction</h2> -<!-- ======================================================================= --> - -<p>Clang Plugins run FrontendActions over code. See the -<a href="RAVFrontendAction.html">FrontendAction tutorial</a> on how to write a -FrontendAction using the RecursiveASTVisitor. In this tutorial, we'll -demonstrate how to write a simple clang plugin. -</p> - -<!-- ======================================================================= --> -<h2 id="pluginactions">Writing a PluginASTAction</h2> -<!-- ======================================================================= --> - -<p>The main difference from writing normal FrontendActions is that you can -handle plugin command line options. The -PluginASTAction base class declares a ParseArgs method which you have to -implement in your plugin. -</p> -<pre> - bool ParseArgs(const CompilerInstance &CI, - const std::vector<std::string>& args) { - for (unsigned i = 0, e = args.size(); i != e; ++i) { - if (args[i] == "-some-arg") { - // Handle the command line argument. - } - } - return true; - } -</pre> - -<!-- ======================================================================= --> -<h2 id="registerplugin">Registering a plugin</h2> -<!-- ======================================================================= --> - -<p>A plugin is loaded from a dynamic library at runtime by the compiler. To register -a plugin in a library, use FrontendPluginRegistry::Add:</p> -<pre> - static FrontendPluginRegistry::Add<MyPlugin> X("my-plugin-name", "my plugin description"); -</pre> - -<!-- ======================================================================= --> -<h2 id="example">Putting it all together</h2> -<!-- ======================================================================= --> - -<p>Let's look at an example plugin that prints top-level function names. -This example is also checked into the clang repository; please also take a look -at the latest <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/examples/PrintFunctionNames/PrintFunctionNames.cpp?view=markup">checked in version of PrintFunctionNames.cpp</a>.</p> -<pre> -#include "clang/Frontend/FrontendPluginRegistry.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/AST.h" -#include "clang/Frontend/CompilerInstance.h" -#include "llvm/Support/raw_ostream.h" -using namespace clang; - -namespace { - -class PrintFunctionsConsumer : public ASTConsumer { -public: - virtual bool HandleTopLevelDecl(DeclGroupRef DG) { - for (DeclGroupRef::iterator i = DG.begin(), e = DG.end(); i != e; ++i) { - const Decl *D = *i; - if (const NamedDecl *ND = dyn_cast<NamedDecl>(D)) - llvm::errs() << "top-level-decl: \"" << ND->getNameAsString() << "\"\n"; - } - - return true; - } -}; - -class PrintFunctionNamesAction : public PluginASTAction { -protected: - ASTConsumer *CreateASTConsumer(CompilerInstance &CI, llvm::StringRef) { - return new PrintFunctionsConsumer(); - } - - bool ParseArgs(const CompilerInstance &CI, - const std::vector<std::string>& args) { - for (unsigned i = 0, e = args.size(); i != e; ++i) { - llvm::errs() << "PrintFunctionNames arg = " << args[i] << "\n"; - - // Example error handling. - if (args[i] == "-an-error") { - DiagnosticsEngine &D = CI.getDiagnostics(); - unsigned DiagID = D.getCustomDiagID( - DiagnosticsEngine::Error, "invalid argument '" + args[i] + "'"); - D.Report(DiagID); - return false; - } - } - if (args.size() && args[0] == "help") - PrintHelp(llvm::errs()); - - return true; - } - void PrintHelp(llvm::raw_ostream& ros) { - ros << "Help for PrintFunctionNames plugin goes here\n"; - } - -}; - -} - -static FrontendPluginRegistry::Add<PrintFunctionNamesAction> -X("print-fns", "print function names"); -</pre> - -<!-- ======================================================================= --> -<h2 id="running">Running the plugin</h2> -<!-- ======================================================================= --> - -<p>To run a plugin, the dynamic library containing the plugin registry must be -loaded via the -load command line option. This will load all plugins that are -registered, and you can select the plugins to run by specifying the -plugin -option. Additional parameters for the plugins can be passed with -plugin-arg-<plugin-name>.</p> - -<p>Note that those options must reach clang's cc1 process. There are two -ways to do so:</p> -<ul> -<li> -Directly call the parsing process by using the -cc1 option; this has the -downside of not configuring the default header search paths, so you'll need to -specify the full system path configuration on the command line. -</li> -<li> -Use clang as usual, but prefix all arguments to the cc1 process with -Xclang. -</li> -</ul> -<p>For example, to run the print-function-names plugin over a source file in clang, -first build the plugin, and then call clang with the plugin from the source tree:</p> -<pre> - $ export BD=/path/to/build/directory - $ (cd $BD && make PrintFunctionNames ) - $ clang++ -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS \ - -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D_GNU_SOURCE \ - -I$BD/tools/clang/include -Itools/clang/include -I$BD/include -Iinclude \ - tools/clang/tools/clang-check/ClangCheck.cpp -fsyntax-only \ - -Xclang -load -Xclang $BD/lib/PrintFunctionNames.so -Xclang \ - -plugin -Xclang print-fns -</pre> - -<p>Also see the print-function-name plugin example's -<a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/examples/PrintFunctionNames/README.txt?view=markup">README</a></p> - - - -</div> -</body> -</html> - diff --git a/docs/ClangPlugins.rst b/docs/ClangPlugins.rst new file mode 100644 index 0000000..7c5c65c --- /dev/null +++ b/docs/ClangPlugins.rst @@ -0,0 +1,150 @@ +============= +Clang Plugins +============= + +Clang Plugins make it possible to run extra user defined actions during a +compilation. This document will provide a basic walkthrough of how to write and +run a Clang Plugin. + +Introduction +============ + +Clang Plugins run FrontendActions over code. See the :doc:`FrontendAction +tutorial <RAVFrontendAction>` on how to write a ``FrontendAction`` using the +``RecursiveASTVisitor``. In this tutorial, we'll demonstrate how to write a +simple clang plugin. + +Writing a ``PluginASTAction`` +============================= + +The main difference from writing normal ``FrontendActions`` is that you can +handle plugin command line options. The ``PluginASTAction`` base class declares +a ``ParseArgs`` method which you have to implement in your plugin. + +.. code-block:: c++ + + bool ParseArgs(const CompilerInstance &CI, + const std::vector<std::string>& args) { + for (unsigned i = 0, e = args.size(); i != e; ++i) { + if (args[i] == "-some-arg") { + // Handle the command line argument. + } + } + return true; + } + +Registering a plugin +==================== + +A plugin is loaded from a dynamic library at runtime by the compiler. To +register a plugin in a library, use ``FrontendPluginRegistry::Add<>``: + +.. code-block:: c++ + + static FrontendPluginRegistry::Add<MyPlugin> X("my-plugin-name", "my plugin description"); + +Putting it all together +======================= + +Let's look at an example plugin that prints top-level function names. This +example is also checked into the clang repository; please also take a look at +the latest `checked in version of PrintFunctionNames.cpp +<http://llvm.org/viewvc/llvm-project/cfe/trunk/examples/PrintFunctionNames/PrintFunctionNames.cpp?view=markup>`_. + +.. code-block:: c++ + + #include "clang/Frontend/FrontendPluginRegistry.h" + #include "clang/AST/ASTConsumer.h" + #include "clang/AST/AST.h" + #include "clang/Frontend/CompilerInstance.h" + #include "llvm/Support/raw_ostream.h" + using namespace clang; + + namespace { + + class PrintFunctionsConsumer : public ASTConsumer { + public: + virtual bool HandleTopLevelDecl(DeclGroupRef DG) { + for (DeclGroupRef::iterator i = DG.begin(), e = DG.end(); i != e; ++i) { + const Decl *D = *i; + if (const NamedDecl *ND = dyn_cast<NamedDecl>(D)) + llvm::errs() << "top-level-decl: \"" << ND->getNameAsString() << "\"\n"; + } + + return true; + } + }; + + class PrintFunctionNamesAction : public PluginASTAction { + protected: + ASTConsumer *CreateASTConsumer(CompilerInstance &CI, llvm::StringRef) { + return new PrintFunctionsConsumer(); + } + + bool ParseArgs(const CompilerInstance &CI, + const std::vector<std::string>& args) { + for (unsigned i = 0, e = args.size(); i != e; ++i) { + llvm::errs() << "PrintFunctionNames arg = " << args[i] << "\n"; + + // Example error handling. + if (args[i] == "-an-error") { + DiagnosticsEngine &D = CI.getDiagnostics(); + unsigned DiagID = D.getCustomDiagID( + DiagnosticsEngine::Error, "invalid argument '" + args[i] + "'"); + D.Report(DiagID); + return false; + } + } + if (args.size() && args[0] == "help") + PrintHelp(llvm::errs()); + + return true; + } + void PrintHelp(llvm::raw_ostream& ros) { + ros << "Help for PrintFunctionNames plugin goes here\n"; + } + + }; + + } + + static FrontendPluginRegistry::Add<PrintFunctionNamesAction> + X("print-fns", "print function names"); + +Running the plugin +================== + +To run a plugin, the dynamic library containing the plugin registry must be +loaded via the :option:`-load` command line option. This will load all plugins +that are registered, and you can select the plugins to run by specifying the +:option:`-plugin` option. Additional parameters for the plugins can be passed with +:option:`-plugin-arg-<plugin-name>`. + +Note that those options must reach clang's cc1 process. There are two +ways to do so: + +* Directly call the parsing process by using the :option:`-cc1` option; this + has the downside of not configuring the default header search paths, so + you'll need to specify the full system path configuration on the command + line. +* Use clang as usual, but prefix all arguments to the cc1 process with + :option:`-Xclang`. + +For example, to run the ``print-function-names`` plugin over a source file in +clang, first build the plugin, and then call clang with the plugin from the +source tree: + +.. code-block:: console + + $ export BD=/path/to/build/directory + $ (cd $BD && make PrintFunctionNames ) + $ clang++ -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS \ + -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D_GNU_SOURCE \ + -I$BD/tools/clang/include -Itools/clang/include -I$BD/include -Iinclude \ + tools/clang/tools/clang-check/ClangCheck.cpp -fsyntax-only \ + -Xclang -load -Xclang $BD/lib/PrintFunctionNames.so -Xclang \ + -plugin -Xclang print-fns + +Also see the print-function-name plugin example's +`README <http://llvm.org/viewvc/llvm-project/cfe/trunk/examples/PrintFunctionNames/README.txt?view=markup>`_ + diff --git a/docs/ClangTools.html b/docs/ClangTools.html deleted file mode 100644 index 4de57bd..0000000 --- a/docs/ClangTools.html +++ /dev/null @@ -1,110 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>Clang Tools</title> -<link type="text/css" rel="stylesheet" href="../menu.css"> -<link type="text/css" rel="stylesheet" href="../content.css"> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>Clang Tools</h1> -<p>Clang Tools are standalone command line (and potentially GUI) tools design -for use by C++ developers who are already using and enjoying Clang as their -compiler. These tools provide developer-oriented functionality such as fast -syntax checking, automatic formatting, refactoring, etc.</p> - -<p>Only a couple of the most basic and fundamental tools are kept in the primary -Clang Subversion project. The rest of the tools are kept in a side-project so -that developers who don't want or need to build them don't. If you want to get -access to the extra Clang Tools repository, simply check it out into the tools -tree of your Clang checkout and follow the usual process for building and -working with a combined LLVM/Clang checkout:</p> -<ul> - <li>With Subversion: - <ul> - <li><tt>cd llvm/tools/clang/tools</tt></li> - <li><tt>svn co http://llvm.org/svn/llvm-project/clang-tools-extra/trunk - extra</tt></li> - </ul> - </li> - <li>Or with Git: - <ul> - <li><tt>cd llvm/tools/clang/tools</tt></li> - <li><tt>git clone http://llvm.org/git/clang-tools-extra.git extra</tt></li> - </ul> - </li> -</ul> - -<p>This document describes a high-level overview of the organization of Clang -Tools within the project as well as giving an introduction to some of the more -important tools. However, it should be noted that this document is currently -focused on Clang and Clang Tool developers, not on end users of these tools.</p> - -<!-- ======================================================================= --> -<h2 id="org">Clang Tools Organization</h2> -<!-- ======================================================================= --> - -<p>Clang Tools are CLI or GUI programs that are intended to be directly used by -C++ developers. That is they are <em>not</em> primarily for use by Clang -developers, although they are hopefully useful to C++ developers who happen to -work on Clang, and we try to actively dogfood their functionality. They are -developed in three components: the underlying infrastructure for building -a standalone tool based on Clang, core shared logic used by many different tools -in the form of refactoring and rewriting libraries, and the tools -themselves.</p> - -<p>The underlying infrastructure for Clang Tools is the -<a href="LibTooling.html">LibTooling</a> platform. See its documentation for -much more detailed information about how this infrastructure works. The common -refactoring and rewriting toolkit-style library is also part of LibTooling -organizationally.</p> - -<p>A few Clang Tools are developed along side the core Clang libraries as -examples and test cases of fundamental functionality. However, most of the tools -are developed in a side repository to provide easy separation from the core -libraries. We intentionally do not support public libraries in the side -repository, as we want to carefully review and find good APIs for libraries as -they are lifted out of a few tools and into the core Clang library set.</p> - -<p>Regardless of which repository Clang Tools' code resides in, the development -process and practices for all Clang Tools are exactly those of Clang itself. -They are entirely within the Clang <em>project</em>, regardless of the version -control scheme.</p> - - -<!-- ======================================================================= --> -<h2 id="coretools">Core Clang Tools</h2> -<!-- ======================================================================= --> - -<p>The core set of Clang tools that are within the main repository are tools -that very specifically compliment, and allow use and testing of <em>Clang</em> -specific functionality.</p> - -<h3 id="clang-check"><tt>clang-check</tt></h3> -<p>This tool combines the LibTooling framework for running a Clang tool with the -basic Clang diagnostics by syntax checking specific files in a fast, command -line interface. It can also accept flags to re-display the diagnostics in -different formats with different flags, suitable for use driving an IDE or -editor. Furthermore, it can be used in fixit-mode to directly apply fixit-hints -offered by clang.</p> - -<p>FIXME: Link to user-oriented clang-check documentation.</p> - -<!-- ======================================================================= --> -<h2 id="registerplugin">Extra Clang Tools</h2> -<!-- ======================================================================= --> - -<p>As various categories of Clang Tools are added to the extra repository, -they'll be tracked here. The focus of this documentation is on the scope and -features of the tools for other tool developers; each tool should provide its -own user-focused documentation.</p> - -</div> -</body> -</html> - diff --git a/docs/ClangTools.rst b/docs/ClangTools.rst new file mode 100644 index 0000000..b7f7c7b --- /dev/null +++ b/docs/ClangTools.rst @@ -0,0 +1,152 @@ +======== +Overview +======== + +Clang Tools are standalone command line (and potentially GUI) tools +designed for use by C++ developers who are already using and enjoying +Clang as their compiler. These tools provide developer-oriented +functionality such as fast syntax checking, automatic formatting, +refactoring, etc. + +Only a couple of the most basic and fundamental tools are kept in the +primary Clang Subversion project. The rest of the tools are kept in a +side-project so that developers who don't want or need to build them +don't. If you want to get access to the extra Clang Tools repository, +simply check it out into the tools tree of your Clang checkout and +follow the usual process for building and working with a combined +LLVM/Clang checkout: + +- With Subversion: + + - ``cd llvm/tools/clang/tools`` + - ``svn co http://llvm.org/svn/llvm-project/clang-tools-extra/trunk extra`` + +- Or with Git: + + - ``cd llvm/tools/clang/tools`` + - ``git clone http://llvm.org/git/clang-tools-extra.git extra`` + +This document describes a high-level overview of the organization of +Clang Tools within the project as well as giving an introduction to some +of the more important tools. However, it should be noted that this +document is currently focused on Clang and Clang Tool developers, not on +end users of these tools. + +Clang Tools Organization +======================== + +Clang Tools are CLI or GUI programs that are intended to be directly +used by C++ developers. That is they are *not* primarily for use by +Clang developers, although they are hopefully useful to C++ developers +who happen to work on Clang, and we try to actively dogfood their +functionality. They are developed in three components: the underlying +infrastructure for building a standalone tool based on Clang, core +shared logic used by many different tools in the form of refactoring and +rewriting libraries, and the tools themselves. + +The underlying infrastructure for Clang Tools is the +:doc:`LibTooling <LibTooling>` platform. See its documentation for much +more detailed information about how this infrastructure works. The +common refactoring and rewriting toolkit-style library is also part of +LibTooling organizationally. + +A few Clang Tools are developed along side the core Clang libraries as +examples and test cases of fundamental functionality. However, most of +the tools are developed in a side repository to provide easy separation +from the core libraries. We intentionally do not support public +libraries in the side repository, as we want to carefully review and +find good APIs for libraries as they are lifted out of a few tools and +into the core Clang library set. + +Regardless of which repository Clang Tools' code resides in, the +development process and practices for all Clang Tools are exactly those +of Clang itself. They are entirely within the Clang *project*, +regardless of the version control scheme. + +Core Clang Tools +================ + +The core set of Clang tools that are within the main repository are +tools that very specifically complement, and allow use and testing of +*Clang* specific functionality. + +``clang-check`` +--------------- + +:doc:`ClangCheck` combines the LibTooling framework for running a +Clang tool with the basic Clang diagnostics by syntax checking specific files +in a fast, command line interface. It can also accept flags to re-display the +diagnostics in different formats with different flags, suitable for use driving +an IDE or editor. Furthermore, it can be used in fixit-mode to directly apply +fixit-hints offered by clang. See :doc:`HowToSetupToolingForLLVM` for +instructions on how to setup and used `clang-check`. + +``clang-format`` +~~~~~~~~~~~~~~~~ + +Clang-format is both a :doc:`library <LibFormat>` and a :doc:`stand-alone tool +<ClangFormat>` with the goal of automatically reformatting C++ sources files +according to configurable style guides. To do so, clang-format uses Clang's +``Lexer`` to transform an input file into a token stream and then changes all +the whitespace around those tokens. The goal is for clang-format to both serve +both as a user tool (ideally with powerful IDE integrations) and part of other +refactoring tools, e.g. to do a reformatting of all the lines changed during a +renaming. + +``cpp11-migrate`` +~~~~~~~~~~~~~~~~~ +``cpp11-migrate`` migrates C++ code to use C++11 features where appropriate. +Currently it can: + +* convert loops to range-based for loops; + +* convert null pointer constants (like ``NULL`` or ``0``) to C++11 ``nullptr``. + +Extra Clang Tools +================= + +As various categories of Clang Tools are added to the extra repository, +they'll be tracked here. The focus of this documentation is on the scope +and features of the tools for other tool developers; each tool should +provide its own user-focused documentation. + +Ideas for new Tools +=================== + +* C++ cast conversion tool. Will convert C-style casts (``(type) value``) to + appropriate C++ cast (``static_cast``, ``const_cast`` or + ``reinterpret_cast``). +* Non-member ``begin()`` and ``end()`` conversion tool. Will convert + ``foo.begin()`` into ``begin(foo)`` and similarly for ``end()``, where + ``foo`` is a standard container. We could also detect similar patterns for + arrays. +* ``tr1`` removal tool. Will migrate source code from using TR1 library + features to C++11 library. For example: + + .. code-block:: c++ + + #include <tr1/unordered_map> + int main() + { + std::tr1::unordered_map <int, int> ma; + std::cout << ma.size () << std::endl; + return 0; + } + + should be rewritten to: + + .. code-block:: c++ + + #include <unordered_map> + int main() + { + std::unordered_map <int, int> ma; + std::cout << ma.size () << std::endl; + return 0; + } + +* A tool to remove ``auto``. Will convert ``auto`` to an explicit type or add + comments with deduced types. The motivation is that there are developers + that don't want to use ``auto`` because they are afraid that they might lose + control over their code. + diff --git a/docs/DriverInternals.html b/docs/DriverInternals.html deleted file mode 100644 index ce707b9..0000000 --- a/docs/DriverInternals.html +++ /dev/null @@ -1,523 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> - <head> - <title>Clang Driver Manual</title> - <link type="text/css" rel="stylesheet" href="../menu.css"> - <link type="text/css" rel="stylesheet" href="../content.css"> - <style type="text/css"> - td { - vertical-align: top; - } - </style> - </head> - <body> - - <!--#include virtual="../menu.html.incl"--> - - <div id="content"> - - <h1>Driver Design & Internals</h1> - - <ul> - <li><a href="#intro">Introduction</a></li> - <li><a href="#features">Features and Goals</a> - <ul> - <li><a href="#gcccompat">GCC Compatibility</a></li> - <li><a href="#components">Flexible</a></li> - <li><a href="#performance">Low Overhead</a></li> - <li><a href="#simple">Simple</a></li> - </ul> - </li> - <li><a href="#design">Design</a> - <ul> - <li><a href="#int_intro">Internals Introduction</a></li> - <li><a href="#int_overview">Design Overview</a></li> - <li><a href="#int_notes">Additional Notes</a> - <ul> - <li><a href="#int_compilation">The Compilation Object</a></li> - <li><a href="#int_unified_parsing">Unified Parsing & Pipelining</a></li> - <li><a href="#int_toolchain_translation">ToolChain Argument Translation</a></li> - <li><a href="#int_unused_warnings">Unused Argument Warnings</a></li> - </ul> - </li> - <li><a href="#int_gcc_concepts">Relation to GCC Driver Concepts</a></li> - </ul> - </li> - </ul> - - - <!-- ======================================================================= --> - <h2 id="intro">Introduction</h2> - <!-- ======================================================================= --> - - <p>This document describes the Clang driver. The purpose of this - document is to describe both the motivation and design goals - for the driver, as well as details of the internal - implementation.</p> - - <!-- ======================================================================= --> - <h2 id="features">Features and Goals</h2> - <!-- ======================================================================= --> - - <p>The Clang driver is intended to be a production quality - compiler driver providing access to the Clang compiler and - tools, with a command line interface which is compatible with - the gcc driver.</p> - - <p>Although the driver is part of and driven by the Clang - project, it is logically a separate tool which shares many of - the same goals as Clang:</p> - - <p><b>Features</b>:</p> - <ul> - <li><a href="#gcccompat">GCC Compatibility</a></li> - <li><a href="#components">Flexible</a></li> - <li><a href="#performance">Low Overhead</a></li> - <li><a href="#simple">Simple</a></li> - </ul> - - <!--=======================================================================--> - <h3 id="gcccompat">GCC Compatibility</h3> - <!--=======================================================================--> - - <p>The number one goal of the driver is to ease the adoption of - Clang by allowing users to drop Clang into a build system - which was designed to call GCC. Although this makes the driver - much more complicated than might otherwise be necessary, we - decided that being very compatible with the gcc command line - interface was worth it in order to allow users to quickly test - clang on their projects.</p> - - <!--=======================================================================--> - <h3 id="components">Flexible</h3> - <!--=======================================================================--> - - <p>The driver was designed to be flexible and easily accommodate - new uses as we grow the clang and LLVM infrastructure. As one - example, the driver can easily support the introduction of - tools which have an integrated assembler; something we hope to - add to LLVM in the future.</p> - - <p>Similarly, most of the driver functionality is kept in a - library which can be used to build other tools which want to - implement or accept a gcc like interface. </p> - - <!--=======================================================================--> - <h3 id="performance">Low Overhead</h3> - <!--=======================================================================--> - - <p>The driver should have as little overhead as possible. In - practice, we found that the gcc driver by itself incurred a - small but meaningful overhead when compiling many small - files. The driver doesn't do much work compared to a - compilation, but we have tried to keep it as efficient as - possible by following a few simple principles:</p> - <ul> - <li>Avoid memory allocation and string copying when - possible.</li> - - <li>Don't parse arguments more than once.</li> - - <li>Provide a few simple interfaces for efficiently searching - arguments.</li> - </ul> - - <!--=======================================================================--> - <h3 id="simple">Simple</h3> - <!--=======================================================================--> - - <p>Finally, the driver was designed to be "as simple as - possible", given the other goals. Notably, trying to be - completely compatible with the gcc driver adds a significant - amount of complexity. However, the design of the driver - attempts to mitigate this complexity by dividing the process - into a number of independent stages instead of a single - monolithic task.</p> - - <!-- ======================================================================= --> - <h2 id="design">Internal Design and Implementation</h2> - <!-- ======================================================================= --> - - <ul> - <li><a href="#int_intro">Internals Introduction</a></li> - <li><a href="#int_overview">Design Overview</a></li> - <li><a href="#int_notes">Additional Notes</a></li> - <li><a href="#int_gcc_concepts">Relation to GCC Driver Concepts</a></li> - </ul> - - <!--=======================================================================--> - <h3><a name="int_intro">Internals Introduction</a></h3> - <!--=======================================================================--> - - <p>In order to satisfy the stated goals, the driver was designed - to completely subsume the functionality of the gcc executable; - that is, the driver should not need to delegate to gcc to - perform subtasks. On Darwin, this implies that the Clang - driver also subsumes the gcc driver-driver, which is used to - implement support for building universal images (binaries and - object files). This also implies that the driver should be - able to call the language specific compilers (e.g. cc1) - directly, which means that it must have enough information to - forward command line arguments to child processes - correctly.</p> - - <!--=======================================================================--> - <h3><a name="int_overview">Design Overview</a></h3> - <!--=======================================================================--> - - <p>The diagram below shows the significant components of the - driver architecture and how they relate to one another. The - orange components represent concrete data structures built by - the driver, the green components indicate conceptually - distinct stages which manipulate these data structures, and - the blue components are important helper classes. </p> - - <div style="text-align:center"> - <a href="DriverArchitecture.png"> - <img width=400 src="DriverArchitecture.png" - alt="Driver Architecture Diagram"> - </a> - </div> - - <!--=======================================================================--> - <h3><a name="int_stages">Driver Stages</a></h3> - <!--=======================================================================--> - - <p>The driver functionality is conceptually divided into five stages:</p> - - <ol> - <li> - <b>Parse: Option Parsing</b> - - <p>The command line argument strings are decomposed into - arguments (<tt>Arg</tt> instances). The driver expects to - understand all available options, although there is some - facility for just passing certain classes of options - through (like <tt>-Wl,</tt>).</p> - - <p>Each argument corresponds to exactly one - abstract <tt>Option</tt> definition, which describes how - the option is parsed along with some additional - metadata. The Arg instances themselves are lightweight and - merely contain enough information for clients to determine - which option they correspond to and their values (if they - have additional parameters).</p> - - <p>For example, a command line like "-Ifoo -I foo" would - parse to two Arg instances (a JoinedArg and a SeparateArg - instance), but each would refer to the same Option.</p> - - <p>Options are lazily created in order to avoid populating - all Option classes when the driver is loaded. Most of the - driver code only needs to deal with options by their - unique ID (e.g., <tt>options::OPT_I</tt>),</p> - - <p>Arg instances themselves do not generally store the - values of parameters. In many cases, this would - simply result in creating unnecessary string - copies. Instead, Arg instances are always embedded inside - an ArgList structure, which contains the original vector - of argument strings. Each Arg itself only needs to contain - an index into this vector instead of storing its values - directly.</p> - - <p>The clang driver can dump the results of this - stage using the <tt>-ccc-print-options</tt> flag (which - must precede any actual command line arguments). For - example:</p> - <pre> - $ <b>clang -ccc-print-options -Xarch_i386 -fomit-frame-pointer -Wa,-fast -Ifoo -I foo t.c</b> - Option 0 - Name: "-Xarch_", Values: {"i386", "-fomit-frame-pointer"} - Option 1 - Name: "-Wa,", Values: {"-fast"} - Option 2 - Name: "-I", Values: {"foo"} - Option 3 - Name: "-I", Values: {"foo"} - Option 4 - Name: "<input>", Values: {"t.c"} - </pre> - - <p>After this stage is complete the command line should be - broken down into well defined option objects with their - appropriate parameters. Subsequent stages should rarely, - if ever, need to do any string processing.</p> - </li> - - <li> - <b>Pipeline: Compilation Job Construction</b> - - <p>Once the arguments are parsed, the tree of subprocess - jobs needed for the desired compilation sequence are - constructed. This involves determining the input files and - their types, what work is to be done on them (preprocess, - compile, assemble, link, etc.), and constructing a list of - Action instances for each task. The result is a list of - one or more top-level actions, each of which generally - corresponds to a single output (for example, an object or - linked executable).</p> - - <p>The majority of Actions correspond to actual tasks, - however there are two special Actions. The first is - InputAction, which simply serves to adapt an input - argument for use as an input to other Actions. The second - is BindArchAction, which conceptually alters the - architecture to be used for all of its input Actions.</p> - - <p>The clang driver can dump the results of this - stage using the <tt>-ccc-print-phases</tt> flag. For - example:</p> - <pre> - $ <b>clang -ccc-print-phases -x c t.c -x assembler t.s</b> - 0: input, "t.c", c - 1: preprocessor, {0}, cpp-output - 2: compiler, {1}, assembler - 3: assembler, {2}, object - 4: input, "t.s", assembler - 5: assembler, {4}, object - 6: linker, {3, 5}, image - </pre> - <p>Here the driver is constructing seven distinct actions, - four to compile the "t.c" input into an object file, two to - assemble the "t.s" input, and one to link them together.</p> - - <p>A rather different compilation pipeline is shown here; in - this example there are two top level actions to compile - the input files into two separate object files, where each - object file is built using <tt>lipo</tt> to merge results - built for two separate architectures.</p> - <pre> - $ <b>clang -ccc-print-phases -c -arch i386 -arch x86_64 t0.c t1.c</b> - 0: input, "t0.c", c - 1: preprocessor, {0}, cpp-output - 2: compiler, {1}, assembler - 3: assembler, {2}, object - 4: bind-arch, "i386", {3}, object - 5: bind-arch, "x86_64", {3}, object - 6: lipo, {4, 5}, object - 7: input, "t1.c", c - 8: preprocessor, {7}, cpp-output - 9: compiler, {8}, assembler - 10: assembler, {9}, object - 11: bind-arch, "i386", {10}, object - 12: bind-arch, "x86_64", {10}, object - 13: lipo, {11, 12}, object - </pre> - - <p>After this stage is complete the compilation process is - divided into a simple set of actions which need to be - performed to produce intermediate or final outputs (in - some cases, like <tt>-fsyntax-only</tt>, there is no - "real" final output). Phases are well known compilation - steps, such as "preprocess", "compile", "assemble", - "link", etc.</p> - </li> - - <li> - <b>Bind: Tool & Filename Selection</b> - - <p>This stage (in conjunction with the Translate stage) - turns the tree of Actions into a list of actual subprocess - to run. Conceptually, the driver performs a top down - matching to assign Action(s) to Tools. The ToolChain is - responsible for selecting the tool to perform a particular - action; once selected the driver interacts with the tool - to see if it can match additional actions (for example, by - having an integrated preprocessor). - - <p>Once Tools have been selected for all actions, the driver - determines how the tools should be connected (for example, - using an inprocess module, pipes, temporary files, or user - provided filenames). If an output file is required, the - driver also computes the appropriate file name (the suffix - and file location depend on the input types and options - such as <tt>-save-temps</tt>). - - <p>The driver interacts with a ToolChain to perform the Tool - bindings. Each ToolChain contains information about all - the tools needed for compilation for a particular - architecture, platform, and operating system. A single - driver invocation may query multiple ToolChains during one - compilation in order to interact with tools for separate - architectures.</p> - - <p>The results of this stage are not computed directly, but - the driver can print the results via - the <tt>-ccc-print-bindings</tt> option. For example:</p> - <pre> - $ <b>clang -ccc-print-bindings -arch i386 -arch ppc t0.c</b> - # "i386-apple-darwin9" - "clang", inputs: ["t0.c"], output: "/tmp/cc-Sn4RKF.s" - # "i386-apple-darwin9" - "darwin::Assemble", inputs: ["/tmp/cc-Sn4RKF.s"], output: "/tmp/cc-gvSnbS.o" - # "i386-apple-darwin9" - "darwin::Link", inputs: ["/tmp/cc-gvSnbS.o"], output: "/tmp/cc-jgHQxi.out" - # "ppc-apple-darwin9" - "gcc::Compile", inputs: ["t0.c"], output: "/tmp/cc-Q0bTox.s" - # "ppc-apple-darwin9" - "gcc::Assemble", inputs: ["/tmp/cc-Q0bTox.s"], output: "/tmp/cc-WCdicw.o" - # "ppc-apple-darwin9" - "gcc::Link", inputs: ["/tmp/cc-WCdicw.o"], output: "/tmp/cc-HHBEBh.out" - # "i386-apple-darwin9" - "darwin::Lipo", inputs: ["/tmp/cc-jgHQxi.out", "/tmp/cc-HHBEBh.out"], output: "a.out" - </pre> - - <p>This shows the tool chain, tool, inputs and outputs which - have been bound for this compilation sequence. Here clang - is being used to compile t0.c on the i386 architecture and - darwin specific versions of the tools are being used to - assemble and link the result, but generic gcc versions of - the tools are being used on PowerPC.</p> - </li> - - <li> - <b>Translate: Tool Specific Argument Translation</b> - - <p>Once a Tool has been selected to perform a particular - Action, the Tool must construct concrete Jobs which will be - executed during compilation. The main work is in translating - from the gcc style command line options to whatever options - the subprocess expects.</p> - - <p>Some tools, such as the assembler, only interact with a - handful of arguments and just determine the path of the - executable to call and pass on their input and output - arguments. Others, like the compiler or the linker, may - translate a large number of arguments in addition.</p> - - <p>The ArgList class provides a number of simple helper - methods to assist with translating arguments; for example, - to pass on only the last of arguments corresponding to some - option, or all arguments for an option.</p> - - <p>The result of this stage is a list of Jobs (executable - paths and argument strings) to execute.</p> - </li> - - <li> - <b>Execute</b> - <p>Finally, the compilation pipeline is executed. This is - mostly straightforward, although there is some interaction - with options - like <tt>-pipe</tt>, <tt>-pass-exit-codes</tt> - and <tt>-time</tt>.</p> - </li> - - </ol> - - <!--=======================================================================--> - <h3><a name="int_notes">Additional Notes</a></h3> - <!--=======================================================================--> - - <h4 id="int_compilation">The Compilation Object</h4> - - <p>The driver constructs a Compilation object for each set of - command line arguments. The Driver itself is intended to be - invariant during construction of a Compilation; an IDE should be - able to construct a single long lived driver instance to use - for an entire build, for example.</p> - - <p>The Compilation object holds information that is particular - to each compilation sequence. For example, the list of used - temporary files (which must be removed once compilation is - finished) and result files (which should be removed if - compilation fails).</p> - - <h4 id="int_unified_parsing">Unified Parsing & Pipelining</h4> - - <p>Parsing and pipelining both occur without reference to a - Compilation instance. This is by design; the driver expects that - both of these phases are platform neutral, with a few very well - defined exceptions such as whether the platform uses a driver - driver.</p> - - <h4 id="int_toolchain_translation">ToolChain Argument Translation</h4> - - <p>In order to match gcc very closely, the clang driver - currently allows tool chains to perform their own translation of - the argument list (into a new ArgList data structure). Although - this allows the clang driver to match gcc easily, it also makes - the driver operation much harder to understand (since the Tools - stop seeing some arguments the user provided, and see new ones - instead).</p> - - <p>For example, on Darwin <tt>-gfull</tt> gets translated into two - separate arguments, <tt>-g</tt> - and <tt>-fno-eliminate-unused-debug-symbols</tt>. Trying to write Tool - logic to do something with <tt>-gfull</tt> will not work, because Tool - argument translation is done after the arguments have been - translated.</p> - - <p>A long term goal is to remove this tool chain specific - translation, and instead force each tool to change its own logic - to do the right thing on the untranslated original arguments.</p> - - <h4 id="int_unused_warnings">Unused Argument Warnings</h4> - <p>The driver operates by parsing all arguments but giving Tools - the opportunity to choose which arguments to pass on. One - downside of this infrastructure is that if the user misspells - some option, or is confused about which options to use, some - command line arguments the user really cared about may go - unused. This problem is particularly important when using - clang as a compiler, since the clang compiler does not support - anywhere near all the options that gcc does, and we want to make - sure users know which ones are being used.</p> - - <p>To support this, the driver maintains a bit associated with - each argument of whether it has been used (at all) during the - compilation. This bit usually doesn't need to be set by hand, - as the key ArgList accessors will set it automatically.</p> - - <p>When a compilation is successful (there are no errors), the - driver checks the bit and emits an "unused argument" warning for - any arguments which were never accessed. This is conservative - (the argument may not have been used to do what the user wanted) - but still catches the most obvious cases.</p> - - <!--=======================================================================--> - <h3><a name="int_gcc_concepts">Relation to GCC Driver Concepts</a></h3> - <!--=======================================================================--> - - <p>For those familiar with the gcc driver, this section provides - a brief overview of how things from the gcc driver map to the - clang driver.</p> - - <ul> - <li> - <b>Driver Driver</b> - <p>The driver driver is fully integrated into the clang - driver. The driver simply constructs additional Actions to - bind the architecture during the <i>Pipeline</i> - phase. The tool chain specific argument translation is - responsible for handling <tt>-Xarch_</tt>.</p> - - <p>The one caveat is that this approach - requires <tt>-Xarch_</tt> not be used to alter the - compilation itself (for example, one cannot - provide <tt>-S</tt> as an <tt>-Xarch_</tt> argument). The - driver attempts to reject such invocations, and overall - there isn't a good reason to abuse <tt>-Xarch_</tt> to - that end in practice.</p> - - <p>The upside is that the clang driver is more efficient and - does little extra work to support universal builds. It also - provides better error reporting and UI consistency.</p> - </li> - - <li> - <b>Specs</b> - <p>The clang driver has no direct correspondent for - "specs". The majority of the functionality that is - embedded in specs is in the Tool specific argument - translation routines. The parts of specs which control the - compilation pipeline are generally part of - the <i>Pipeline</i> stage.</p> - </li> - - <li> - <b>Toolchains</b> - <p>The gcc driver has no direct understanding of tool - chains. Each gcc binary roughly corresponds to the - information which is embedded inside a single - ToolChain.</p> - - <p>The clang driver is intended to be portable and support - complex compilation environments. All platform and tool - chain specific code should be protected behind either - abstract or well defined interfaces (such as whether the - platform supports use as a driver driver).</p> - </li> - </ul> - </div> - </body> -</html> diff --git a/docs/DriverInternals.rst b/docs/DriverInternals.rst new file mode 100644 index 0000000..c779555 --- /dev/null +++ b/docs/DriverInternals.rst @@ -0,0 +1,400 @@ +========================= +Driver Design & Internals +========================= + +.. contents:: + :local: + +Introduction +============ + +This document describes the Clang driver. The purpose of this document +is to describe both the motivation and design goals for the driver, as +well as details of the internal implementation. + +Features and Goals +================== + +The Clang driver is intended to be a production quality compiler driver +providing access to the Clang compiler and tools, with a command line +interface which is compatible with the gcc driver. + +Although the driver is part of and driven by the Clang project, it is +logically a separate tool which shares many of the same goals as Clang: + +.. contents:: Features + :local: + +GCC Compatibility +----------------- + +The number one goal of the driver is to ease the adoption of Clang by +allowing users to drop Clang into a build system which was designed to +call GCC. Although this makes the driver much more complicated than +might otherwise be necessary, we decided that being very compatible with +the gcc command line interface was worth it in order to allow users to +quickly test clang on their projects. + +Flexible +-------- + +The driver was designed to be flexible and easily accommodate new uses +as we grow the clang and LLVM infrastructure. As one example, the driver +can easily support the introduction of tools which have an integrated +assembler; something we hope to add to LLVM in the future. + +Similarly, most of the driver functionality is kept in a library which +can be used to build other tools which want to implement or accept a gcc +like interface. + +Low Overhead +------------ + +The driver should have as little overhead as possible. In practice, we +found that the gcc driver by itself incurred a small but meaningful +overhead when compiling many small files. The driver doesn't do much +work compared to a compilation, but we have tried to keep it as +efficient as possible by following a few simple principles: + +- Avoid memory allocation and string copying when possible. +- Don't parse arguments more than once. +- Provide a few simple interfaces for efficiently searching arguments. + +Simple +------ + +Finally, the driver was designed to be "as simple as possible", given +the other goals. Notably, trying to be completely compatible with the +gcc driver adds a significant amount of complexity. However, the design +of the driver attempts to mitigate this complexity by dividing the +process into a number of independent stages instead of a single +monolithic task. + +Internal Design and Implementation +================================== + +.. contents:: + :local: + :depth: 1 + +Internals Introduction +---------------------- + +In order to satisfy the stated goals, the driver was designed to +completely subsume the functionality of the gcc executable; that is, the +driver should not need to delegate to gcc to perform subtasks. On +Darwin, this implies that the Clang driver also subsumes the gcc +driver-driver, which is used to implement support for building universal +images (binaries and object files). This also implies that the driver +should be able to call the language specific compilers (e.g. cc1) +directly, which means that it must have enough information to forward +command line arguments to child processes correctly. + +Design Overview +--------------- + +The diagram below shows the significant components of the driver +architecture and how they relate to one another. The orange components +represent concrete data structures built by the driver, the green +components indicate conceptually distinct stages which manipulate these +data structures, and the blue components are important helper classes. + +.. image:: DriverArchitecture.png + :align: center + :alt: Driver Architecture Diagram + +Driver Stages +------------- + +The driver functionality is conceptually divided into five stages: + +#. **Parse: Option Parsing** + + The command line argument strings are decomposed into arguments + (``Arg`` instances). The driver expects to understand all available + options, although there is some facility for just passing certain + classes of options through (like ``-Wl,``). + + Each argument corresponds to exactly one abstract ``Option`` + definition, which describes how the option is parsed along with some + additional metadata. The Arg instances themselves are lightweight and + merely contain enough information for clients to determine which + option they correspond to and their values (if they have additional + parameters). + + For example, a command line like "-Ifoo -I foo" would parse to two + Arg instances (a JoinedArg and a SeparateArg instance), but each + would refer to the same Option. + + Options are lazily created in order to avoid populating all Option + classes when the driver is loaded. Most of the driver code only needs + to deal with options by their unique ID (e.g., ``options::OPT_I``), + + Arg instances themselves do not generally store the values of + parameters. In many cases, this would simply result in creating + unnecessary string copies. Instead, Arg instances are always embedded + inside an ArgList structure, which contains the original vector of + argument strings. Each Arg itself only needs to contain an index into + this vector instead of storing its values directly. + + The clang driver can dump the results of this stage using the + ``-ccc-print-options`` flag (which must precede any actual command + line arguments). For example: + + .. code-block:: console + + $ clang -ccc-print-options -Xarch_i386 -fomit-frame-pointer -Wa,-fast -Ifoo -I foo t.c + Option 0 - Name: "-Xarch_", Values: {"i386", "-fomit-frame-pointer"} + Option 1 - Name: "-Wa,", Values: {"-fast"} + Option 2 - Name: "-I", Values: {"foo"} + Option 3 - Name: "-I", Values: {"foo"} + Option 4 - Name: "<input>", Values: {"t.c"} + + After this stage is complete the command line should be broken down + into well defined option objects with their appropriate parameters. + Subsequent stages should rarely, if ever, need to do any string + processing. + +#. **Pipeline: Compilation Job Construction** + + Once the arguments are parsed, the tree of subprocess jobs needed for + the desired compilation sequence are constructed. This involves + determining the input files and their types, what work is to be done + on them (preprocess, compile, assemble, link, etc.), and constructing + a list of Action instances for each task. The result is a list of one + or more top-level actions, each of which generally corresponds to a + single output (for example, an object or linked executable). + + The majority of Actions correspond to actual tasks, however there are + two special Actions. The first is InputAction, which simply serves to + adapt an input argument for use as an input to other Actions. The + second is BindArchAction, which conceptually alters the architecture + to be used for all of its input Actions. + + The clang driver can dump the results of this stage using the + ``-ccc-print-phases`` flag. For example: + + .. code-block:: console + + $ clang -ccc-print-phases -x c t.c -x assembler t.s + 0: input, "t.c", c + 1: preprocessor, {0}, cpp-output + 2: compiler, {1}, assembler + 3: assembler, {2}, object + 4: input, "t.s", assembler + 5: assembler, {4}, object + 6: linker, {3, 5}, image + + Here the driver is constructing seven distinct actions, four to + compile the "t.c" input into an object file, two to assemble the + "t.s" input, and one to link them together. + + A rather different compilation pipeline is shown here; in this + example there are two top level actions to compile the input files + into two separate object files, where each object file is built using + ``lipo`` to merge results built for two separate architectures. + + .. code-block:: console + + $ clang -ccc-print-phases -c -arch i386 -arch x86_64 t0.c t1.c + 0: input, "t0.c", c + 1: preprocessor, {0}, cpp-output + 2: compiler, {1}, assembler + 3: assembler, {2}, object + 4: bind-arch, "i386", {3}, object + 5: bind-arch, "x86_64", {3}, object + 6: lipo, {4, 5}, object + 7: input, "t1.c", c + 8: preprocessor, {7}, cpp-output + 9: compiler, {8}, assembler + 10: assembler, {9}, object + 11: bind-arch, "i386", {10}, object + 12: bind-arch, "x86_64", {10}, object + 13: lipo, {11, 12}, object + + After this stage is complete the compilation process is divided into + a simple set of actions which need to be performed to produce + intermediate or final outputs (in some cases, like ``-fsyntax-only``, + there is no "real" final output). Phases are well known compilation + steps, such as "preprocess", "compile", "assemble", "link", etc. + +#. **Bind: Tool & Filename Selection** + + This stage (in conjunction with the Translate stage) turns the tree + of Actions into a list of actual subprocess to run. Conceptually, the + driver performs a top down matching to assign Action(s) to Tools. The + ToolChain is responsible for selecting the tool to perform a + particular action; once selected the driver interacts with the tool + to see if it can match additional actions (for example, by having an + integrated preprocessor). + + Once Tools have been selected for all actions, the driver determines + how the tools should be connected (for example, using an inprocess + module, pipes, temporary files, or user provided filenames). If an + output file is required, the driver also computes the appropriate + file name (the suffix and file location depend on the input types and + options such as ``-save-temps``). + + The driver interacts with a ToolChain to perform the Tool bindings. + Each ToolChain contains information about all the tools needed for + compilation for a particular architecture, platform, and operating + system. A single driver invocation may query multiple ToolChains + during one compilation in order to interact with tools for separate + architectures. + + The results of this stage are not computed directly, but the driver + can print the results via the ``-ccc-print-bindings`` option. For + example: + + .. code-block:: console + + $ clang -ccc-print-bindings -arch i386 -arch ppc t0.c + # "i386-apple-darwin9" - "clang", inputs: ["t0.c"], output: "/tmp/cc-Sn4RKF.s" + # "i386-apple-darwin9" - "darwin::Assemble", inputs: ["/tmp/cc-Sn4RKF.s"], output: "/tmp/cc-gvSnbS.o" + # "i386-apple-darwin9" - "darwin::Link", inputs: ["/tmp/cc-gvSnbS.o"], output: "/tmp/cc-jgHQxi.out" + # "ppc-apple-darwin9" - "gcc::Compile", inputs: ["t0.c"], output: "/tmp/cc-Q0bTox.s" + # "ppc-apple-darwin9" - "gcc::Assemble", inputs: ["/tmp/cc-Q0bTox.s"], output: "/tmp/cc-WCdicw.o" + # "ppc-apple-darwin9" - "gcc::Link", inputs: ["/tmp/cc-WCdicw.o"], output: "/tmp/cc-HHBEBh.out" + # "i386-apple-darwin9" - "darwin::Lipo", inputs: ["/tmp/cc-jgHQxi.out", "/tmp/cc-HHBEBh.out"], output: "a.out" + + This shows the tool chain, tool, inputs and outputs which have been + bound for this compilation sequence. Here clang is being used to + compile t0.c on the i386 architecture and darwin specific versions of + the tools are being used to assemble and link the result, but generic + gcc versions of the tools are being used on PowerPC. + +#. **Translate: Tool Specific Argument Translation** + + Once a Tool has been selected to perform a particular Action, the + Tool must construct concrete Jobs which will be executed during + compilation. The main work is in translating from the gcc style + command line options to whatever options the subprocess expects. + + Some tools, such as the assembler, only interact with a handful of + arguments and just determine the path of the executable to call and + pass on their input and output arguments. Others, like the compiler + or the linker, may translate a large number of arguments in addition. + + The ArgList class provides a number of simple helper methods to + assist with translating arguments; for example, to pass on only the + last of arguments corresponding to some option, or all arguments for + an option. + + The result of this stage is a list of Jobs (executable paths and + argument strings) to execute. + +#. **Execute** + + Finally, the compilation pipeline is executed. This is mostly + straightforward, although there is some interaction with options like + ``-pipe``, ``-pass-exit-codes`` and ``-time``. + +Additional Notes +---------------- + +The Compilation Object +^^^^^^^^^^^^^^^^^^^^^^ + +The driver constructs a Compilation object for each set of command line +arguments. The Driver itself is intended to be invariant during +construction of a Compilation; an IDE should be able to construct a +single long lived driver instance to use for an entire build, for +example. + +The Compilation object holds information that is particular to each +compilation sequence. For example, the list of used temporary files +(which must be removed once compilation is finished) and result files +(which should be removed if compilation fails). + +Unified Parsing & Pipelining +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Parsing and pipelining both occur without reference to a Compilation +instance. This is by design; the driver expects that both of these +phases are platform neutral, with a few very well defined exceptions +such as whether the platform uses a driver driver. + +ToolChain Argument Translation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In order to match gcc very closely, the clang driver currently allows +tool chains to perform their own translation of the argument list (into +a new ArgList data structure). Although this allows the clang driver to +match gcc easily, it also makes the driver operation much harder to +understand (since the Tools stop seeing some arguments the user +provided, and see new ones instead). + +For example, on Darwin ``-gfull`` gets translated into two separate +arguments, ``-g`` and ``-fno-eliminate-unused-debug-symbols``. Trying to +write Tool logic to do something with ``-gfull`` will not work, because +Tool argument translation is done after the arguments have been +translated. + +A long term goal is to remove this tool chain specific translation, and +instead force each tool to change its own logic to do the right thing on +the untranslated original arguments. + +Unused Argument Warnings +^^^^^^^^^^^^^^^^^^^^^^^^ + +The driver operates by parsing all arguments but giving Tools the +opportunity to choose which arguments to pass on. One downside of this +infrastructure is that if the user misspells some option, or is confused +about which options to use, some command line arguments the user really +cared about may go unused. This problem is particularly important when +using clang as a compiler, since the clang compiler does not support +anywhere near all the options that gcc does, and we want to make sure +users know which ones are being used. + +To support this, the driver maintains a bit associated with each +argument of whether it has been used (at all) during the compilation. +This bit usually doesn't need to be set by hand, as the key ArgList +accessors will set it automatically. + +When a compilation is successful (there are no errors), the driver +checks the bit and emits an "unused argument" warning for any arguments +which were never accessed. This is conservative (the argument may not +have been used to do what the user wanted) but still catches the most +obvious cases. + +Relation to GCC Driver Concepts +------------------------------- + +For those familiar with the gcc driver, this section provides a brief +overview of how things from the gcc driver map to the clang driver. + +- **Driver Driver** + + The driver driver is fully integrated into the clang driver. The + driver simply constructs additional Actions to bind the architecture + during the *Pipeline* phase. The tool chain specific argument + translation is responsible for handling ``-Xarch_``. + + The one caveat is that this approach requires ``-Xarch_`` not be used + to alter the compilation itself (for example, one cannot provide + ``-S`` as an ``-Xarch_`` argument). The driver attempts to reject + such invocations, and overall there isn't a good reason to abuse + ``-Xarch_`` to that end in practice. + + The upside is that the clang driver is more efficient and does little + extra work to support universal builds. It also provides better error + reporting and UI consistency. + +- **Specs** + + The clang driver has no direct correspondent for "specs". The + majority of the functionality that is embedded in specs is in the + Tool specific argument translation routines. The parts of specs which + control the compilation pipeline are generally part of the *Pipeline* + stage. + +- **Toolchains** + + The gcc driver has no direct understanding of tool chains. Each gcc + binary roughly corresponds to the information which is embedded + inside a single ToolChain. + + The clang driver is intended to be portable and support complex + compilation environments. All platform and tool chain specific code + should be protected behind either abstract or well defined interfaces + (such as whether the platform supports use as a driver driver). diff --git a/docs/ExternalClangExamples.rst b/docs/ExternalClangExamples.rst new file mode 100644 index 0000000..c7fd4c5 --- /dev/null +++ b/docs/ExternalClangExamples.rst @@ -0,0 +1,80 @@ +======================= +External Clang Examples +======================= + +Introduction +============ + +This page provides some examples of the kinds of things that people have +done with Clang that might serve as useful guides (or starting points) from +which to develop your own tools. They may be helpful even for something as +banal (but necessary) as how to set up your build to integrate Clang. + +Clang's library-based design is deliberately aimed at facilitating use by +external projects, and we are always interested in improving Clang to +better serve our external users. Some typical categories of applications +where Clang is used are: + +- Static analysis. +- Documentation/cross-reference generation. + +If you know of (or wrote!) a tool or project using Clang, please send an +email to Clang's `development discussion mailing list +<http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev>`_ to have it added. +(or if you are already a Clang contributor, feel free to directly commit +additions). Since the primary purpose of this page is to provide examples +that can help developers, generally they must have code available. + +List of projects and tools +========================== + +`<https://github.com/Andersbakken/rtags/>`_ + "RTags is a client/server application that indexes c/c++ code and keeps + a persistent in-memory database of references, symbolnames, completions + etc." + +`<http://rprichard.github.com/sourceweb/>`_ + "A C/C++ source code indexer and navigator" + +`<https://github.com/etaoins/qconnectlint>`_ + "qconnectlint is a Clang tool for statically verifying the consistency + of signal and slot connections made with Qt's ``QObject::connect``." + +`<https://github.com/woboq/woboq_codebrowser>`_ + "The Woboq Code Browser is a web-based code browser for C/C++ projects. + Check out `<http://code.woboq.org/>`_ for an example!" + +`<https://github.com/mozilla/dxr>`_ + "DXR is a source code cross-reference tool that uses static analysis + data collected by instrumented compilers." + +`<https://github.com/eschulte/clang-mutate>`_ + "This tool performs a number of operations on C-language source files." + +`<https://github.com/gmarpons/Crisp>`_ + "A coding rule validation add-on for LLVM/clang. Crisp rules are written + in Prolog. A high-level declarative DSL to easily write new rules is under + development. It will be called CRISP, an acronym for *Coding Rules in + Sugared Prolog*." + +`<https://github.com/drothlis/clang-ctags>`_ + "Generate tag file for C++ source code." + +`<https://github.com/exclipy/clang_indexer>`_ + "This is an indexer for C and C++ based on the libclang library." + +`<https://github.com/holtgrewe/linty>`_ + "Linty - C/C++ Style Checking with Python & libclang." + +`<https://github.com/axw/cmonster>`_ + "cmonster is a Python wrapper for the Clang C++ parser." + +`<https://github.com/rizsotto/Constantine>`_ + "Constantine is a toy project to learn how to write clang plugin. + Implements pseudo const analysis. Generates warnings about variables, + which were declared without const qualifier." + +`<https://github.com/jessevdk/cldoc>`_ + "cldoc is a Clang based documentation generator for C and C++. + cldoc tries to solve the issue of writing C/C++ software documentation + with a modern, non-intrusive and robust approach." diff --git a/docs/FAQ.rst b/docs/FAQ.rst new file mode 100644 index 0000000..4c4f8a8 --- /dev/null +++ b/docs/FAQ.rst @@ -0,0 +1,64 @@ +================================ +Frequently Asked Questions (FAQ) +================================ + +.. contents:: + :local: + +Driver +====== + +I run ``clang -cc1 ...`` and get weird errors about missing headers +------------------------------------------------------------------- + +Given this source file: + +.. code-block:: c + + #include <stdio.h> + + int main() { + printf("Hello world\n"); + } + + +If you run: + +.. code-block:: console + + $ clang -cc1 hello.c + hello.c:1:10: fatal error: 'stdio.h' file not found + #include <stdio.h> + ^ + 1 error generated. + +``clang -cc1`` is the frontend, ``clang`` is the :doc:`driver +<DriverInternals>`. The driver invokes the frontend with options appropriate +for your system. To see these options, run: + +.. code-block:: console + + $ clang -### -c hello.c + +Some clang command line options are driver-only options, some are frontend-only +options. Frontend-only options are intended to be used only by clang developers. +Users should not run ``clang -cc1`` directly, because ``-cc1`` options are not +guaranteed to be stable. + +If you want to use a frontend-only option ("a ``-cc1`` option"), for example +``-ast-dump``, then you need to take the ``clang -cc1`` line generated by the +driver and add the option you need. Alternatively, you can run +``clang -Xclang <option> ...`` to force the driver pass ``<option>`` to +``clang -cc1``. + +I get errors about some headers being missing (``stddef.h``, ``stdarg.h``) +-------------------------------------------------------------------------- + +Some header files (``stddef.h``, ``stdarg.h``, and others) are shipped with +Clang --- these are called builtin includes. Clang searches for them in a +directory relative to the location of the ``clang`` binary. If you moved the +``clang`` binary, you need to move the builtin headers, too. + +More information can be found in the :ref:`libtooling_builtin_includes` +section. + diff --git a/docs/HowToSetupToolingForLLVM.html b/docs/HowToSetupToolingForLLVM.html deleted file mode 100644 index 022ed9c..0000000 --- a/docs/HowToSetupToolingForLLVM.html +++ /dev/null @@ -1,212 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>How To Setup Clang Tooling For LLVM</title> -<link type="text/css" rel="stylesheet" href="../menu.css"> -<link type="text/css" rel="stylesheet" href="../content.css"> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>How To Setup Clang Tooling For LLVM</h1> -<p>Clang Tooling provides infrastructure to write tools that need syntactic and -semantic infomation about a program. This term also relates to a set of specific -tools using this infrastructure (e.g. <code>clang-check</code>). This document -provides information on how to set up and use Clang Tooling for the LLVM source -code.</p> - - -<!-- ======================================================================= --> -<h2><a name="introduction">Introduction</a></h2> -<!-- ======================================================================= --> - -<p>Clang Tooling needs a compilation database to figure out specific build -options for each file. Currently it can create a compilation database from the -<code>compilation_commands.json</code> file, generated by CMake. When invoking -clang tools, you can either specify a path to a build directory using a command -line parameter <code>-p</code> or let Clang Tooling find this file in your -source tree. In either case you need to configure your build using CMake to use -clang tools.</p> - -<!-- ======================================================================= --> -<h2><a name="using-make">Setup Clang Tooling Using CMake and Make</a></h2> -<!-- ======================================================================= --> - -<p>If you intend to use make to build LLVM, you should have CMake 2.8.6 or later -installed (can be found <a href="http://cmake.org">here</a>).</p> -<p>First, you need to generate Makefiles for LLVM with CMake. You need to make -a build directory and run CMake from it:</p> -<pre> - mkdir your/build/directory - cd your/build/directory - cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources -</pre> - -<p>If you want to use clang instead of GCC, you can add -<code>-DCMAKE_C_COMPILER=/path/to/clang - -DCMAKE_CXX_COMPILER=/path/to/clang++</code>. -You can also use ccmake, which provides a curses interface to configure CMake -variables for lazy people.</p> - -<p>As a result, the new <code>compile_commands.json</code> file should appear in -the current directory. You should link it to the LLVM source tree so that Clang -Tooling is able to use it:</p> -<pre> - ln -s $PWD/compile_commands.json path/to/llvm/source/ -</pre> - -<p>Now you are ready to build and test LLVM using make:</p> -<pre> - make check-all -</pre> - -<!-- ======================================================================= --> -<h2><a name="using-tools">Using Clang Tools</a></h2> -<!-- ======================================================================= --> - -<p>After you completed the previous steps, you are ready to run clang tools. If -you have a recent clang installed, you should have <code>clang-check</code> in -$PATH. Try to run it on any .cpp file inside the LLVM source tree:</p> -<pre> - clang-check tools/clang/lib/Tooling/CompilationDatabase.cpp -</pre> -<p>If you're using vim, it's convenient to have clang-check integrated. Put this -into your .vimrc:</p> -<pre> -function! ClangCheckImpl(cmd) - if &autowrite | wall | endif - echo "Running " . a:cmd . " ..." - let l:output = system(a:cmd) - cexpr l:output - cwindow - let w:quickfix_title = a:cmd - if v:shell_error != 0 - cc - endif - let g:clang_check_last_cmd = a:cmd -endfunction - -function! ClangCheck() - let l:filename = expand('%') - if l:filename =~ '\.\(cpp\|cxx\|cc\|c\)$' - call ClangCheckImpl("clang-check " . l:filename) - elseif exists("g:clang_check_last_cmd") - call ClangCheckImpl(g:clang_check_last_cmd) - else - echo "Can't detect file's compilation arguments and no previous clang-check invocation!" - endif -endfunction - -nmap <silent> <F5> :call ClangCheck()<CR><CR> -</pre> - -<p>When editing a .cpp/.cxx/.cc/.c file, hit F5 to reparse the file. In case -the current file has a different extension (for example, .h), F5 will re-run -the last clang-check invocation made from this vim instance (if any). The -output will go into the error window, which is opened automatically when -clang-check finds errors, and can be re-opened with <code>:cope</code>.</p> - -<p>Other <code>clang-check</code> options that can be useful when working with -clang AST:</p> -<ul> - <li><code>-ast-print</code> - Build ASTs and then pretty-print them.</li> - <li><code>-ast-dump</code> - Build ASTs and then debug dump them.</li> - <li><code>-ast-dump-filter=<string></code> - Use with - <code>-ast-dump</code> or <code>-ast-print</code> to dump/print - only AST declaration nodes having a certain substring in a qualified name. - Use <code>-ast-list</code> to list all filterable declaration node - names.</li> - <li><code>-ast-list</code> - Build ASTs and print the list of declaration - node qualified names.</li> -</ul> -<p>Examples:</p> -<pre> -<b>$ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-dump -ast-dump-filter ActionFactory::newASTConsumer</b> -Processing: tools/clang/tools/clang-check/ClangCheck.cpp. -Dumping <anonymous namespace>::ActionFactory::newASTConsumer: -clang::ASTConsumer *newASTConsumer() (CompoundStmt 0x44da290 </home/alexfh/local/llvm/tools/clang/tools/clang-check/ClangCheck.cpp:64:40, line:72:3> - (IfStmt 0x44d97c8 <line:65:5, line:66:45> - <<<NULL>>> - (ImplicitCastExpr 0x44d96d0 <line:65:9> '_Bool':'_Bool' <UserDefinedConversion> -... -<b>$ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-print -ast-dump-filter ActionFactory::newASTConsumer</b> -Processing: tools/clang/tools/clang-check/ClangCheck.cpp. -Printing <anonymous namespace>::ActionFactory::newASTConsumer: -clang::ASTConsumer *newASTConsumer() { - if (this->ASTList.operator _Bool()) - return clang::CreateASTDeclNodeLister(); - if (this->ASTDump.operator _Bool()) - return clang::CreateASTDumper(this->ASTDumpFilter); - if (this->ASTPrint.operator _Bool()) - return clang::CreateASTPrinter(&llvm::outs(), this->ASTDumpFilter); - return new clang::ASTConsumer(); -} -</pre> - -<!-- ======================================================================= --> -<h2><a name="using-ninja">(Experimental) Using Ninja Build System</a></h2> -<!-- ======================================================================= --> - -<p>Optionally you can use the <a - href="https://github.com/martine/ninja">Ninja</a> build system instead of -make. It is aimed at making your builds faster. Currently this step will require -building Ninja from sources and using a development version of CMake.</p> -<p>To take advantage of using Clang Tools along with Ninja build you need at -least CMake 2.8.9. At the moment CMake 2.8.9 is still under development, so you -can get latest development sources and build it yourself:</p> -<pre> - git clone git://cmake.org/cmake.git - cd cmake - ./bootstrap - make - sudo make install -</pre> - -<p>Having the correct version of CMake, you can clone the Ninja git repository -and build Ninja from sources:</p> -<pre> - git clone git://github.com/martine/ninja.git - cd ninja/ - ./bootstrap.py -</pre> -<p>This will result in a single binary <code>ninja</code> in the current -directory. It doesn't require installation and can just be copied to any -location inside <code>$PATH</code>, say <code>/usr/local/bin/</code>:</p> -<pre> - sudo cp ninja /usr/local/bin/ - sudo chmod a+rx /usr/local/bin/ninja -</pre> -<p>After doing all of this, you'll need to generate Ninja build files for LLVM -with CMake. You need to make a build directory and run CMake from it:</p> -<pre> - mkdir your/build/directory - cd your/build/directory - cmake -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources -</pre> - -<p>If you want to use clang instead of GCC, you can add -<code>-DCMAKE_C_COMPILER=/path/to/clang - -DCMAKE_CXX_COMPILER=/path/to/clang++</code>. -You can also use ccmake, which provides a curses interface to configure CMake -variables in an interactive manner.</p> - -<p>As a result, the new <code>compile_commands.json</code> file should appear in -the current directory. You should link it to the LLVM source tree so that Clang -Tooling is able to use it:</p> -<pre> - ln -s $PWD/compile_commands.json path/to/llvm/source/ -</pre> - -<p>Now you are ready to build and test LLVM using Ninja:</p> -<pre> - ninja check-all -</pre> -<p>Other target names can be used in the same way as with make.</p> -</div> -</body> -</html> - diff --git a/docs/HowToSetupToolingForLLVM.rst b/docs/HowToSetupToolingForLLVM.rst new file mode 100644 index 0000000..9247742 --- /dev/null +++ b/docs/HowToSetupToolingForLLVM.rst @@ -0,0 +1,199 @@ +=================================== +How To Setup Clang Tooling For LLVM +=================================== + +Clang Tooling provides infrastructure to write tools that need syntactic +and semantic information about a program. This term also relates to a set +of specific tools using this infrastructure (e.g. ``clang-check``). This +document provides information on how to set up and use Clang Tooling for +the LLVM source code. + +Introduction +============ + +Clang Tooling needs a compilation database to figure out specific build +options for each file. Currently it can create a compilation database +from the ``compilation_commands.json`` file, generated by CMake. When +invoking clang tools, you can either specify a path to a build directory +using a command line parameter ``-p`` or let Clang Tooling find this +file in your source tree. In either case you need to configure your +build using CMake to use clang tools. + +Setup Clang Tooling Using CMake and Make +======================================== + +If you intend to use make to build LLVM, you should have CMake 2.8.6 or +later installed (can be found `here <http://cmake.org>`_). + +First, you need to generate Makefiles for LLVM with CMake. You need to +make a build directory and run CMake from it: + +.. code-block:: console + + $ mkdir your/build/directory + $ cd your/build/directory + $ cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources + +If you want to use clang instead of GCC, you can add +``-DCMAKE_C_COMPILER=/path/to/clang -DCMAKE_CXX_COMPILER=/path/to/clang++``. +You can also use ``ccmake``, which provides a curses interface to configure +CMake variables for lazy people. + +As a result, the new ``compile_commands.json`` file should appear in the +current directory. You should link it to the LLVM source tree so that +Clang Tooling is able to use it: + +.. code-block:: console + + $ ln -s $PWD/compile_commands.json path/to/llvm/source/ + +Now you are ready to build and test LLVM using make: + +.. code-block:: console + + $ make check-all + +Using Clang Tools +================= + +After you completed the previous steps, you are ready to run clang tools. If +you have a recent clang installed, you should have ``clang-check`` in +``$PATH``. Try to run it on any ``.cpp`` file inside the LLVM source tree: + +.. code-block:: console + + $ clang-check tools/clang/lib/Tooling/CompilationDatabase.cpp + +If you're using vim, it's convenient to have clang-check integrated. Put +this into your ``.vimrc``: + +:: + + function! ClangCheckImpl(cmd) + if &autowrite | wall | endif + echo "Running " . a:cmd . " ..." + let l:output = system(a:cmd) + cexpr l:output + cwindow + let w:quickfix_title = a:cmd + if v:shell_error != 0 + cc + endif + let g:clang_check_last_cmd = a:cmd + endfunction + + function! ClangCheck() + let l:filename = expand('%') + if l:filename =~ '\.\(cpp\|cxx\|cc\|c\)$' + call ClangCheckImpl("clang-check " . l:filename) + elseif exists("g:clang_check_last_cmd") + call ClangCheckImpl(g:clang_check_last_cmd) + else + echo "Can't detect file's compilation arguments and no previous clang-check invocation!" + endif + endfunction + + nmap <silent> <F5> :call ClangCheck()<CR><CR> + +When editing a .cpp/.cxx/.cc/.c file, hit F5 to reparse the file. In +case the current file has a different extension (for example, .h), F5 +will re-run the last clang-check invocation made from this vim instance +(if any). The output will go into the error window, which is opened +automatically when clang-check finds errors, and can be re-opened with +``:cope``. + +Other ``clang-check`` options that can be useful when working with clang +AST: + +* ``-ast-print`` --- Build ASTs and then pretty-print them. +* ``-ast-dump`` --- Build ASTs and then debug dump them. +* ``-ast-dump-filter=<string>`` --- Use with ``-ast-dump`` or ``-ast-print`` to + dump/print only AST declaration nodes having a certain substring in a + qualified name. Use ``-ast-list`` to list all filterable declaration node + names. +* ``-ast-list`` --- Build ASTs and print the list of declaration node qualified + names. + +Examples: + +.. code-block:: console + + $ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-dump -ast-dump-filter ActionFactory::newASTConsumer + Processing: tools/clang/tools/clang-check/ClangCheck.cpp. + Dumping ::ActionFactory::newASTConsumer: + clang::ASTConsumer *newASTConsumer() (CompoundStmt 0x44da290 </home/alexfh/local/llvm/tools/clang/tools/clang-check/ClangCheck.cpp:64:40, line:72:3> + (IfStmt 0x44d97c8 <line:65:5, line:66:45> + <<<NULL>>> + (ImplicitCastExpr 0x44d96d0 <line:65:9> '_Bool':'_Bool' <UserDefinedConversion> + ... + $ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-print -ast-dump-filter ActionFactory::newASTConsumer + Processing: tools/clang/tools/clang-check/ClangCheck.cpp. + Printing <anonymous namespace>::ActionFactory::newASTConsumer: + clang::ASTConsumer *newASTConsumer() { + if (this->ASTList.operator _Bool()) + return clang::CreateASTDeclNodeLister(); + if (this->ASTDump.operator _Bool()) + return clang::CreateASTDumper(this->ASTDumpFilter); + if (this->ASTPrint.operator _Bool()) + return clang::CreateASTPrinter(&llvm::outs(), this->ASTDumpFilter); + return new clang::ASTConsumer(); + } + +(Experimental) Using Ninja Build System +======================================= + +Optionally you can use the `Ninja <https://github.com/martine/ninja>`_ +build system instead of make. It is aimed at making your builds faster. +Currently this step will require building Ninja from sources. + +To take advantage of using Clang Tools along with Ninja build you need +at least CMake 2.8.9. + +Clone the Ninja git repository and build Ninja from sources: + +.. code-block:: console + + $ git clone git://github.com/martine/ninja.git + $ cd ninja/ + $ ./bootstrap.py + +This will result in a single binary ``ninja`` in the current directory. +It doesn't require installation and can just be copied to any location +inside ``$PATH``, say ``/usr/local/bin/``: + +.. code-block:: console + + $ sudo cp ninja /usr/local/bin/ + $ sudo chmod a+rx /usr/local/bin/ninja + +After doing all of this, you'll need to generate Ninja build files for +LLVM with CMake. You need to make a build directory and run CMake from +it: + +.. code-block:: console + + $ mkdir your/build/directory + $ cd your/build/directory + $ cmake -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources + +If you want to use clang instead of GCC, you can add +``-DCMAKE_C_COMPILER=/path/to/clang -DCMAKE_CXX_COMPILER=/path/to/clang++``. +You can also use ``ccmake``, which provides a curses interface to configure +CMake variables in an interactive manner. + +As a result, the new ``compile_commands.json`` file should appear in the +current directory. You should link it to the LLVM source tree so that +Clang Tooling is able to use it: + +.. code-block:: console + + $ ln -s $PWD/compile_commands.json path/to/llvm/source/ + +Now you are ready to build and test LLVM using Ninja: + +.. code-block:: console + + $ ninja check-all + +Other target names can be used in the same way as with make. + diff --git a/docs/InternalsManual.html b/docs/InternalsManual.html deleted file mode 100644 index 57f0631..0000000 --- a/docs/InternalsManual.html +++ /dev/null @@ -1,2019 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>"Clang" CFE Internals Manual</title> -<link type="text/css" rel="stylesheet" href="../menu.css"> -<link type="text/css" rel="stylesheet" href="../content.css"> -<style type="text/css"> -td { - vertical-align: top; -} -</style> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>"Clang" CFE Internals Manual</h1> - -<ul> -<li><a href="#intro">Introduction</a></li> -<li><a href="#libsupport">LLVM Support Library</a></li> -<li><a href="#libbasic">The Clang 'Basic' Library</a> - <ul> - <li><a href="#Diagnostics">The Diagnostics Subsystem</a></li> - <li><a href="#SourceLocation">The SourceLocation and SourceManager - classes</a></li> - <li><a href="#SourceRange">SourceRange and CharSourceRange</a></li> - </ul> -</li> -<li><a href="#libdriver">The Driver Library</a> -</li> -<li><a href="#pch">Precompiled Headers</a> -<li><a href="#libfrontend">The Frontend Library</a> -</li> -<li><a href="#liblex">The Lexer and Preprocessor Library</a> - <ul> - <li><a href="#Token">The Token class</a></li> - <li><a href="#Lexer">The Lexer class</a></li> - <li><a href="#AnnotationToken">Annotation Tokens</a></li> - <li><a href="#TokenLexer">The TokenLexer class</a></li> - <li><a href="#MultipleIncludeOpt">The MultipleIncludeOpt class</a></li> - </ul> -</li> -<li><a href="#libparse">The Parser Library</a> -</li> -<li><a href="#libast">The AST Library</a> - <ul> - <li><a href="#Type">The Type class and its subclasses</a></li> - <li><a href="#QualType">The QualType class</a></li> - <li><a href="#DeclarationName">Declaration names</a></li> - <li><a href="#DeclContext">Declaration contexts</a> - <ul> - <li><a href="#Redeclarations">Redeclarations and Overloads</a></li> - <li><a href="#LexicalAndSemanticContexts">Lexical and Semantic - Contexts</a></li> - <li><a href="#TransparentContexts">Transparent Declaration Contexts</a></li> - <li><a href="#MultiDeclContext">Multiply-Defined Declaration Contexts</a></li> - </ul> - </li> - <li><a href="#CFG">The CFG class</a></li> - <li><a href="#Constants">Constant Folding in the Clang AST</a></li> - </ul> -</li> -<li><a href="#Howtos">Howto guides</a> - <ul> - <li><a href="#AddingAttributes">How to add an attribute</a></li> - <li><a href="#AddingExprStmt">How to add a new expression or statement</a></li> - </ul> -</li> -</ul> - - -<!-- ======================================================================= --> -<h2 id="intro">Introduction</h2> -<!-- ======================================================================= --> - -<p>This document describes some of the more important APIs and internal design -decisions made in the Clang C front-end. The purpose of this document is to -both capture some of this high level information and also describe some of the -design decisions behind it. This is meant for people interested in hacking on -Clang, not for end-users. The description below is categorized by -libraries, and does not describe any of the clients of the libraries.</p> - -<!-- ======================================================================= --> -<h2 id="libsupport">LLVM Support Library</h2> -<!-- ======================================================================= --> - -<p>The LLVM libsupport library provides many underlying libraries and -<a href="http://llvm.org/docs/ProgrammersManual.html">data-structures</a>, -including command line option processing, various containers and a system -abstraction layer, which is used for file system access.</p> - -<!-- ======================================================================= --> -<h2 id="libbasic">The Clang 'Basic' Library</h2> -<!-- ======================================================================= --> - -<p>This library certainly needs a better name. The 'basic' library contains a -number of low-level utilities for tracking and manipulating source buffers, -locations within the source buffers, diagnostics, tokens, target abstraction, -and information about the subset of the language being compiled for.</p> - -<p>Part of this infrastructure is specific to C (such as the TargetInfo class), -other parts could be reused for other non-C-based languages (SourceLocation, -SourceManager, Diagnostics, FileManager). When and if there is future demand -we can figure out if it makes sense to introduce a new library, move the general -classes somewhere else, or introduce some other solution.</p> - -<p>We describe the roles of these classes in order of their dependencies.</p> - - -<!-- ======================================================================= --> -<h3 id="Diagnostics">The Diagnostics Subsystem</h3> -<!-- ======================================================================= --> - -<p>The Clang Diagnostics subsystem is an important part of how the compiler -communicates with the human. Diagnostics are the warnings and errors produced -when the code is incorrect or dubious. In Clang, each diagnostic produced has -(at the minimum) a unique ID, an English translation associated with it, a <a -href="#SourceLocation">SourceLocation</a> to "put the caret", and a severity (e.g. -<tt>WARNING</tt> or <tt>ERROR</tt>). They can also optionally include a number -of arguments to the dianostic (which fill in "%0"'s in the string) as well as a -number of source ranges that related to the diagnostic.</p> - -<p>In this section, we'll be giving examples produced by the Clang command line -driver, but diagnostics can be <a href="#DiagnosticClient">rendered in many -different ways</a> depending on how the DiagnosticClient interface is -implemented. A representative example of a diagnostic is:</p> - -<pre> -t.c:38:15: error: invalid operands to binary expression ('int *' and '_Complex float') - <span style="color:darkgreen">P = (P-42) + Gamma*4;</span> - <span style="color:blue">~~~~~~ ^ ~~~~~~~</span> -</pre> - -<p>In this example, you can see the English translation, the severity (error), -you can see the source location (the caret ("^") and file/line/column info), -the source ranges "~~~~", arguments to the diagnostic ("int*" and "_Complex -float"). You'll have to believe me that there is a unique ID backing the -diagnostic :).</p> - -<p>Getting all of this to happen has several steps and involves many moving -pieces, this section describes them and talks about best practices when adding -a new diagnostic.</p> - -<!-- ============================= --> -<h4>The Diagnostic*Kinds.td files</h4> -<!-- ============================= --> - -<p>Diagnostics are created by adding an entry to one of the <tt> -clang/Basic/Diagnostic*Kinds.td</tt> files, depending on what library will -be using it. From this file, tblgen generates the unique ID of the diagnostic, -the severity of the diagnostic and the English translation + format string.</p> - -<p>There is little sanity with the naming of the unique ID's right now. Some -start with err_, warn_, ext_ to encode the severity into the name. Since the -enum is referenced in the C++ code that produces the diagnostic, it is somewhat -useful for it to be reasonably short.</p> - -<p>The severity of the diagnostic comes from the set {<tt>NOTE</tt>, -<tt>WARNING</tt>, <tt>EXTENSION</tt>, <tt>EXTWARN</tt>, <tt>ERROR</tt>}. The -<tt>ERROR</tt> severity is used for diagnostics indicating the program is never -acceptable under any circumstances. When an error is emitted, the AST for the -input code may not be fully built. The <tt>EXTENSION</tt> and <tt>EXTWARN</tt> -severities are used for extensions to the language that Clang accepts. This -means that Clang fully understands and can represent them in the AST, but we -produce diagnostics to tell the user their code is non-portable. The difference -is that the former are ignored by default, and the later warn by default. The -<tt>WARNING</tt> severity is used for constructs that are valid in the currently -selected source language but that are dubious in some way. The <tt>NOTE</tt> -level is used to staple more information onto previous diagnostics.</p> - -<p>These <em>severities</em> are mapped into a smaller set (the -Diagnostic::Level enum, {<tt>Ignored</tt>, <tt>Note</tt>, <tt>Warning</tt>, -<tt>Error</tt>, <tt>Fatal</tt> }) of output <em>levels</em> by the diagnostics -subsystem based on various configuration options. Clang internally supports a -fully fine grained mapping mechanism that allows you to map almost any -diagnostic to the output level that you want. The only diagnostics that cannot -be mapped are <tt>NOTE</tt>s, which always follow the severity of the previously -emitted diagnostic and <tt>ERROR</tt>s, which can only be mapped to -<tt>Fatal</tt> (it is not possible to turn an error into a warning, -for example).</p> - -<p>Diagnostic mappings are used in many ways. For example, if the user -specifies <tt>-pedantic</tt>, <tt>EXTENSION</tt> maps to <tt>Warning</tt>, if -they specify <tt>-pedantic-errors</tt>, it turns into <tt>Error</tt>. This is -used to implement options like <tt>-Wunused_macros</tt>, <tt>-Wundef</tt> etc. -</p> - -<p> -Mapping to <tt>Fatal</tt> should only be used for diagnostics that are -considered so severe that error recovery won't be able to recover sensibly from -them (thus spewing a ton of bogus errors). One example of this class of error -are failure to #include a file. -</p> - -<!-- ================= --> -<h4>The Format String</h4> -<!-- ================= --> - -<p>The format string for the diagnostic is very simple, but it has some power. -It takes the form of a string in English with markers that indicate where and -how arguments to the diagnostic are inserted and formatted. For example, here -are some simple format strings:</p> - -<pre> - "binary integer literals are an extension" - "format string contains '\\0' within the string body" - "more '<b>%%</b>' conversions than data arguments" - "invalid operands to binary expression (<b>%0</b> and <b>%1</b>)" - "overloaded '<b>%0</b>' must be a <b>%select{unary|binary|unary or binary}2</b> operator" - " (has <b>%1</b> parameter<b>%s1</b>)" -</pre> - -<p>These examples show some important points of format strings. You can use any - plain ASCII character in the diagnostic string except "%" without a problem, - but these are C strings, so you have to use and be aware of all the C escape - sequences (as in the second example). If you want to produce a "%" in the - output, use the "%%" escape sequence, like the third diagnostic. Finally, - Clang uses the "%...[digit]" sequences to specify where and how arguments to - the diagnostic are formatted.</p> - -<p>Arguments to the diagnostic are numbered according to how they are specified - by the C++ code that <a href="#producingdiag">produces them</a>, and are - referenced by <tt>%0</tt> .. <tt>%9</tt>. If you have more than 10 arguments - to your diagnostic, you are doing something wrong :). Unlike printf, there - is no requirement that arguments to the diagnostic end up in the output in - the same order as they are specified, you could have a format string with - <tt>"%1 %0"</tt> that swaps them, for example. The text in between the - percent and digit are formatting instructions. If there are no instructions, - the argument is just turned into a string and substituted in.</p> - -<p>Here are some "best practices" for writing the English format string:</p> - -<ul> -<li>Keep the string short. It should ideally fit in the 80 column limit of the - <tt>DiagnosticKinds.td</tt> file. This avoids the diagnostic wrapping when - printed, and forces you to think about the important point you are conveying - with the diagnostic.</li> -<li>Take advantage of location information. The user will be able to see the - line and location of the caret, so you don't need to tell them that the - problem is with the 4th argument to the function: just point to it.</li> -<li>Do not capitalize the diagnostic string, and do not end it with a - period.</li> -<li>If you need to quote something in the diagnostic string, use single - quotes.</li> -</ul> - -<p>Diagnostics should never take random English strings as arguments: you -shouldn't use <tt>"you have a problem with %0"</tt> and pass in things like -<tt>"your argument"</tt> or <tt>"your return value"</tt> as arguments. Doing -this prevents <a href="#translation">translating</a> the Clang diagnostics to -other languages (because they'll get random English words in their otherwise -localized diagnostic). The exceptions to this are C/C++ language keywords -(e.g. auto, const, mutable, etc) and C/C++ operators (<tt>/=</tt>). Note -that things like "pointer" and "reference" are not keywords. On the other -hand, you <em>can</em> include anything that comes from the user's source code, -including variable names, types, labels, etc. The 'select' format can be -used to achieve this sort of thing in a localizable way, see below.</p> - -<!-- ==================================== --> -<h4>Formatting a Diagnostic Argument</h4> -<!-- ==================================== --> - -<p>Arguments to diagnostics are fully typed internally, and come from a couple -different classes: integers, types, names, and random strings. Depending on -the class of the argument, it can be optionally formatted in different ways. -This gives the DiagnosticClient information about what the argument means -without requiring it to use a specific presentation (consider this MVC for -Clang :).</p> - -<p>Here are the different diagnostic argument formats currently supported by -Clang:</p> - -<table> -<tr><td colspan="2"><b>"s" format</b></td></tr> -<tr><td>Example:</td><td><tt>"requires %1 parameter%s1"</tt></td></tr> -<tr><td>Class:</td><td>Integers</td></tr> -<tr><td>Description:</td><td>This is a simple formatter for integers that is - useful when producing English diagnostics. When the integer is 1, it prints - as nothing. When the integer is not 1, it prints as "s". This allows some - simple grammatical forms to be to be handled correctly, and eliminates the - need to use gross things like <tt>"requires %1 parameter(s)"</tt>.</td></tr> - -<tr><td colspan="2"><b>"select" format</b></td></tr> -<tr><td>Example:</td><td><tt>"must be a %select{unary|binary|unary or binary}2 - operator"</tt></td></tr> -<tr><td>Class:</td><td>Integers</td></tr> -<tr><td>Description:</td><td><p>This format specifier is used to merge multiple - related diagnostics together into one common one, without requiring the - difference to be specified as an English string argument. Instead of - specifying the string, the diagnostic gets an integer argument and the - format string selects the numbered option. In this case, the "%2" value - must be an integer in the range [0..2]. If it is 0, it prints 'unary', if - it is 1 it prints 'binary' if it is 2, it prints 'unary or binary'. This - allows other language translations to substitute reasonable words (or entire - phrases) based on the semantics of the diagnostic instead of having to do - things textually.</p> - <p>The selected string does undergo formatting.</p></td></tr> - -<tr><td colspan="2"><b>"plural" format</b></td></tr> -<tr><td>Example:</td><td><tt>"you have %1 %plural{1:mouse|:mice}1 connected to - your computer"</tt></td></tr> -<tr><td>Class:</td><td>Integers</td></tr> -<tr><td>Description:</td><td><p>This is a formatter for complex plural forms. - It is designed to handle even the requirements of languages with very - complex plural forms, as many Baltic languages have. The argument consists - of a series of expression/form pairs, separated by ':', where the first form - whose expression evaluates to true is the result of the modifier.</p> - <p>An expression can be empty, in which case it is always true. See the - example at the top. Otherwise, it is a series of one or more numeric - conditions, separated by ','. If any condition matches, the expression - matches. Each numeric condition can take one of three forms.</p> - <ul> - <li>number: A simple decimal number matches if the argument is the same - as the number. Example: <tt>"%plural{1:mouse|:mice}4"</tt></li> - <li>range: A range in square brackets matches if the argument is within - the range. Then range is inclusive on both ends. Example: - <tt>"%plural{0:none|1:one|[2,5]:some|:many}2"</tt></li> - <li>modulo: A modulo operator is followed by a number, and - equals sign and either a number or a range. The tests are the - same as for plain - numbers and ranges, but the argument is taken modulo the number first. - Example: <tt>"%plural{%100=0:even hundred|%100=[1,50]:lower half|:everything - else}1"</tt></li> - </ul> - <p>The parser is very unforgiving. A syntax error, even whitespace, will - abort, as will a failure to match the argument against any - expression.</p></td></tr> - -<tr><td colspan="2"><b>"ordinal" format</b></td></tr> -<tr><td>Example:</td><td><tt>"ambiguity in %ordinal0 argument"</tt></td></tr> -<tr><td>Class:</td><td>Integers</td></tr> -<tr><td>Description:</td><td><p>This is a formatter which represents the - argument number as an ordinal: the value <tt>1</tt> becomes <tt>1st</tt>, - <tt>3</tt> becomes <tt>3rd</tt>, and so on. Values less than <tt>1</tt> - are not supported.</p> - <p>This formatter is currently hard-coded to use English ordinals.</p></td></tr> - -<tr><td colspan="2"><b>"objcclass" format</b></td></tr> -<tr><td>Example:</td><td><tt>"method %objcclass0 not found"</tt></td></tr> -<tr><td>Class:</td><td>DeclarationName</td></tr> -<tr><td>Description:</td><td><p>This is a simple formatter that indicates the - DeclarationName corresponds to an Objective-C class method selector. As - such, it prints the selector with a leading '+'.</p></td></tr> - -<tr><td colspan="2"><b>"objcinstance" format</b></td></tr> -<tr><td>Example:</td><td><tt>"method %objcinstance0 not found"</tt></td></tr> -<tr><td>Class:</td><td>DeclarationName</td></tr> -<tr><td>Description:</td><td><p>This is a simple formatter that indicates the - DeclarationName corresponds to an Objective-C instance method selector. As - such, it prints the selector with a leading '-'.</p></td></tr> - -<tr><td colspan="2"><b>"q" format</b></td></tr> -<tr><td>Example:</td><td><tt>"candidate found by name lookup is %q0"</tt></td></tr> -<tr><td>Class:</td><td>NamedDecl*</td></tr> -<tr><td>Description</td><td><p>This formatter indicates that the fully-qualified name of the declaration should be printed, e.g., "std::vector" rather than "vector".</p></td></tr> - -<tr><td colspan="2"><b>"diff" format</b></td></tr> -<tr><td>Example:</td><td><tt>"no known conversion %diff{from | to | }1,2"</tt></td></tr> -<tr><td>Class:</td><td>QualType</td></tr> -<tr><td>Description</td><td><p>This formatter takes two QualTypes and attempts to print a template difference between the two. If tree printing is off, the text inside the braces before the pipe is printed, with the formatted text replacing the $. If tree printing is on, the text after the pipe is printed and a type tree is printed after the diagnostic message. -</p></td></tr> - -</table> - -<p>It is really easy to add format specifiers to the Clang diagnostics system, -but they should be discussed before they are added. If you are creating a lot -of repetitive diagnostics and/or have an idea for a useful formatter, please -bring it up on the cfe-dev mailing list.</p> - -<!-- ===================================================== --> -<h4 id="producingdiag">Producing the Diagnostic</h4> -<!-- ===================================================== --> - -<p>Now that you've created the diagnostic in the DiagnosticKinds.td file, you -need to write the code that detects the condition in question and emits the -new diagnostic. Various components of Clang (e.g. the preprocessor, Sema, -etc) provide a helper function named "Diag". It creates a diagnostic and -accepts the arguments, ranges, and other information that goes along with -it.</p> - -<p>For example, the binary expression error comes from code like this:</p> - -<pre> - if (various things that are bad) - Diag(Loc, diag::err_typecheck_invalid_operands) - << lex->getType() << rex->getType() - << lex->getSourceRange() << rex->getSourceRange(); -</pre> - -<p>This shows that use of the Diag method: they take a location (a <a -href="#SourceLocation">SourceLocation</a> object) and a diagnostic enum value -(which matches the name from DiagnosticKinds.td). If the diagnostic takes -arguments, they are specified with the << operator: the first argument -becomes %0, the second becomes %1, etc. The diagnostic interface allows you to -specify arguments of many different types, including <tt>int</tt> and -<tt>unsigned</tt> for integer arguments, <tt>const char*</tt> and -<tt>std::string</tt> for string arguments, <tt>DeclarationName</tt> and -<tt>const IdentifierInfo*</tt> for names, <tt>QualType</tt> for types, etc. -SourceRanges are also specified with the << operator, but do not have a -specific ordering requirement.</p> - -<p>As you can see, adding and producing a diagnostic is pretty straightforward. -The hard part is deciding exactly what you need to say to help the user, picking -a suitable wording, and providing the information needed to format it correctly. -The good news is that the call site that issues a diagnostic should be -completely independent of how the diagnostic is formatted and in what language -it is rendered. -</p> - -<!-- ==================================================== --> -<h4 id="fix-it-hints">Fix-It Hints</h4> -<!-- ==================================================== --> - -<p>In some cases, the front end emits diagnostics when it is clear -that some small change to the source code would fix the problem. For -example, a missing semicolon at the end of a statement or a use of -deprecated syntax that is easily rewritten into a more modern form. -Clang tries very hard to emit the diagnostic and recover gracefully -in these and other cases.</p> - -<p>However, for these cases where the fix is obvious, the diagnostic -can be annotated with a hint (referred to as a "fix-it hint") that -describes how to change the code referenced by the diagnostic to fix -the problem. For example, it might add the missing semicolon at the -end of the statement or rewrite the use of a deprecated construct -into something more palatable. Here is one such example from the C++ -front end, where we warn about the right-shift operator changing -meaning from C++98 to C++11:</p> - -<pre> -test.cpp:3:7: warning: use of right-shift operator ('>>') in template argument will require parentheses in C++11 -A<100 >> 2> *a; - ^ - ( ) -</pre> - -<p>Here, the fix-it hint is suggesting that parentheses be added, -and showing exactly where those parentheses would be inserted into the -source code. The fix-it hints themselves describe what changes to make -to the source code in an abstract manner, which the text diagnostic -printer renders as a line of "insertions" below the caret line. <a -href="#DiagnosticClient">Other diagnostic clients</a> might choose -to render the code differently (e.g., as markup inline) or even give -the user the ability to automatically fix the problem.</p> - -<p>Fix-it hints on errors and warnings need to obey these rules:</p> - -<ul> -<li>Since they are automatically applied if <code>-Xclang -fixit</code> -is passed to the driver, they should only be used when it's very likely they -match the user's intent.</li> -<li>Clang must recover from errors as if the fix-it had been applied.</li> -</ul> - -<p>If a fix-it can't obey these rules, put the fix-it on a note. Fix-its on -notes are not applied automatically.</p> - -<p>All fix-it hints are described by the <code>FixItHint</code> class, -instances of which should be attached to the diagnostic using the -<< operator in the same way that highlighted source ranges and -arguments are passed to the diagnostic. Fix-it hints can be created -with one of three constructors:</p> - -<dl> - <dt><code>FixItHint::CreateInsertion(Loc, Code)</code></dt> - <dd>Specifies that the given <code>Code</code> (a string) should be inserted - before the source location <code>Loc</code>.</dd> - - <dt><code>FixItHint::CreateRemoval(Range)</code></dt> - <dd>Specifies that the code in the given source <code>Range</code> - should be removed.</dd> - - <dt><code>FixItHint::CreateReplacement(Range, Code)</code></dt> - <dd>Specifies that the code in the given source <code>Range</code> - should be removed, and replaced with the given <code>Code</code> string.</dd> -</dl> - -<!-- ============================================================= --> -<h4><a name="DiagnosticClient">The DiagnosticClient Interface</a></h4> -<!-- ============================================================= --> - -<p>Once code generates a diagnostic with all of the arguments and the rest of -the relevant information, Clang needs to know what to do with it. As previously -mentioned, the diagnostic machinery goes through some filtering to map a -severity onto a diagnostic level, then (assuming the diagnostic is not mapped to -"<tt>Ignore</tt>") it invokes an object that implements the DiagnosticClient -interface with the information.</p> - -<p>It is possible to implement this interface in many different ways. For -example, the normal Clang DiagnosticClient (named 'TextDiagnosticPrinter') turns -the arguments into strings (according to the various formatting rules), prints -out the file/line/column information and the string, then prints out the line of -code, the source ranges, and the caret. However, this behavior isn't required. -</p> - -<p>Another implementation of the DiagnosticClient interface is the -'TextDiagnosticBuffer' class, which is used when Clang is in -verify mode. -Instead of formatting and printing out the diagnostics, this implementation just -captures and remembers the diagnostics as they fly by. Then -verify compares -the list of produced diagnostics to the list of expected ones. If they disagree, -it prints out its own output. Full documentation for the -verify mode can be -found in the Clang API documentation for VerifyDiagnosticConsumer, <a -href="/doxygen/classclang_1_1VerifyDiagnosticConsumer.html#details">here</a>. -</p> - -<p>There are many other possible implementations of this interface, and this is -why we prefer diagnostics to pass down rich structured information in arguments. -For example, an HTML output might want declaration names be linkified to where -they come from in the source. Another example is that a GUI might let you click -on typedefs to expand them. This application would want to pass significantly -more information about types through to the GUI than a simple flat string. The -interface allows this to happen.</p> - -<!-- ====================================================== --> -<h4><a name="translation">Adding Translations to Clang</a></h4> -<!-- ====================================================== --> - -<p>Not possible yet! Diagnostic strings should be written in UTF-8, the client -can translate to the relevant code page if needed. Each translation completely -replaces the format string for the diagnostic.</p> - - -<!-- ======================================================================= --> -<h3 id="SourceLocation">The SourceLocation and SourceManager classes</h3> -<!-- ======================================================================= --> - -<p>Strangely enough, the SourceLocation class represents a location within the -source code of the program. Important design points include:</p> - -<ol> -<li>sizeof(SourceLocation) must be extremely small, as these are embedded into - many AST nodes and are passed around often. Currently it is 32 bits.</li> -<li>SourceLocation must be a simple value object that can be efficiently - copied.</li> -<li>We should be able to represent a source location for any byte of any input - file. This includes in the middle of tokens, in whitespace, in trigraphs, - etc.</li> -<li>A SourceLocation must encode the current #include stack that was active when - the location was processed. For example, if the location corresponds to a - token, it should contain the set of #includes active when the token was - lexed. This allows us to print the #include stack for a diagnostic.</li> -<li>SourceLocation must be able to describe macro expansions, capturing both - the ultimate instantiation point and the source of the original character - data.</li> -</ol> - -<p>In practice, the SourceLocation works together with the SourceManager class -to encode two pieces of information about a location: its spelling location -and its instantiation location. For most tokens, these will be the same. -However, for a macro expansion (or tokens that came from a _Pragma directive) -these will describe the location of the characters corresponding to the token -and the location where the token was used (i.e. the macro instantiation point -or the location of the _Pragma itself).</p> - -<p>The Clang front-end inherently depends on the location of a token being -tracked correctly. If it is ever incorrect, the front-end may get confused and -die. The reason for this is that the notion of the 'spelling' of a Token in -Clang depends on being able to find the original input characters for the token. -This concept maps directly to the "spelling location" for the token.</p> - - -<!-- ======================================================================= --> -<h3 id="SourceRange">SourceRange and CharSourceRange</h3> -<!-- ======================================================================= --> -<!-- mostly taken from - http://lists.cs.uiuc.edu/pipermail/cfe-dev/2010-August/010595.html --> - -<p>Clang represents most source ranges by [first, last], where first and last -each point to the beginning of their respective tokens. For example -consider the SourceRange of the following statement:</p> -<pre> -x = foo + bar; -^first ^last -</pre> - -<p>To map from this representation to a character-based -representation, the 'last' location needs to be adjusted to point to -(or past) the end of that token with either -<code>Lexer::MeasureTokenLength()</code> or -<code>Lexer::getLocForEndOfToken()</code>. For the rare cases -where character-level source ranges information is needed we use -the <code>CharSourceRange</code> class.</p> - - -<!-- ======================================================================= --> -<h2 id="libdriver">The Driver Library</h2> -<!-- ======================================================================= --> - -<p>The clang Driver and library are documented <a -href="DriverInternals.html">here</a>.<p> - -<!-- ======================================================================= --> -<h2 id="pch">Precompiled Headers</h2> -<!-- ======================================================================= --> - -<p>Clang supports two implementations of precompiled headers. The - default implementation, precompiled headers (<a - href="PCHInternals.html">PCH</a>) uses a serialized representation - of Clang's internal data structures, encoded with the <a - href="http://llvm.org/docs/BitCodeFormat.html">LLVM bitstream - format</a>. Pretokenized headers (<a - href="PTHInternals.html">PTH</a>), on the other hand, contain a - serialized representation of the tokens encountered when - preprocessing a header (and anything that header includes).</p> - - -<!-- ======================================================================= --> -<h2 id="libfrontend">The Frontend Library</h2> -<!-- ======================================================================= --> - -<p>The Frontend library contains functionality useful for building -tools on top of the clang libraries, for example several methods for -outputting diagnostics.</p> - -<!-- ======================================================================= --> -<h2 id="liblex">The Lexer and Preprocessor Library</h2> -<!-- ======================================================================= --> - -<p>The Lexer library contains several tightly-connected classes that are involved -with the nasty process of lexing and preprocessing C source code. The main -interface to this library for outside clients is the large <a -href="#Preprocessor">Preprocessor</a> class. -It contains the various pieces of state that are required to coherently read -tokens out of a translation unit.</p> - -<p>The core interface to the Preprocessor object (once it is set up) is the -Preprocessor::Lex method, which returns the next <a href="#Token">Token</a> from -the preprocessor stream. There are two types of token providers that the -preprocessor is capable of reading from: a buffer lexer (provided by the <a -href="#Lexer">Lexer</a> class) and a buffered token stream (provided by the <a -href="#TokenLexer">TokenLexer</a> class). - - -<!-- ======================================================================= --> -<h3 id="Token">The Token class</h3> -<!-- ======================================================================= --> - -<p>The Token class is used to represent a single lexed token. Tokens are -intended to be used by the lexer/preprocess and parser libraries, but are not -intended to live beyond them (for example, they should not live in the ASTs).<p> - -<p>Tokens most often live on the stack (or some other location that is efficient -to access) as the parser is running, but occasionally do get buffered up. For -example, macro definitions are stored as a series of tokens, and the C++ -front-end periodically needs to buffer tokens up for tentative parsing and -various pieces of look-ahead. As such, the size of a Token matter. On a 32-bit -system, sizeof(Token) is currently 16 bytes.</p> - -<p>Tokens occur in two forms: "<a href="#AnnotationToken">Annotation -Tokens</a>" and normal tokens. Normal tokens are those returned by the lexer, -annotation tokens represent semantic information and are produced by the parser, -replacing normal tokens in the token stream. Normal tokens contain the -following information:</p> - -<ul> -<li><b>A SourceLocation</b> - This indicates the location of the start of the -token.</li> - -<li><b>A length</b> - This stores the length of the token as stored in the -SourceBuffer. For tokens that include them, this length includes trigraphs and -escaped newlines which are ignored by later phases of the compiler. By pointing -into the original source buffer, it is always possible to get the original -spelling of a token completely accurately.</li> - -<li><b>IdentifierInfo</b> - If a token takes the form of an identifier, and if -identifier lookup was enabled when the token was lexed (e.g. the lexer was not -reading in 'raw' mode) this contains a pointer to the unique hash value for the -identifier. Because the lookup happens before keyword identification, this -field is set even for language keywords like 'for'.</li> - -<li><b>TokenKind</b> - This indicates the kind of token as classified by the -lexer. This includes things like <tt>tok::starequal</tt> (for the "*=" -operator), <tt>tok::ampamp</tt> for the "&&" token, and keyword values -(e.g. <tt>tok::kw_for</tt>) for identifiers that correspond to keywords. Note -that some tokens can be spelled multiple ways. For example, C++ supports -"operator keywords", where things like "and" are treated exactly like the -"&&" operator. In these cases, the kind value is set to -<tt>tok::ampamp</tt>, which is good for the parser, which doesn't have to -consider both forms. For something that cares about which form is used (e.g. -the preprocessor 'stringize' operator) the spelling indicates the original -form.</li> - -<li><b>Flags</b> - There are currently four flags tracked by the -lexer/preprocessor system on a per-token basis: - - <ol> - <li><b>StartOfLine</b> - This was the first token that occurred on its input - source line.</li> - <li><b>LeadingSpace</b> - There was a space character either immediately - before the token or transitively before the token as it was expanded - through a macro. The definition of this flag is very closely defined by - the stringizing requirements of the preprocessor.</li> - <li><b>DisableExpand</b> - This flag is used internally to the preprocessor to - represent identifier tokens which have macro expansion disabled. This - prevents them from being considered as candidates for macro expansion ever - in the future.</li> - <li><b>NeedsCleaning</b> - This flag is set if the original spelling for the - token includes a trigraph or escaped newline. Since this is uncommon, - many pieces of code can fast-path on tokens that did not need cleaning. - </ol> -</li> -</ul> - -<p>One interesting (and somewhat unusual) aspect of normal tokens is that they -don't contain any semantic information about the lexed value. For example, if -the token was a pp-number token, we do not represent the value of the number -that was lexed (this is left for later pieces of code to decide). Additionally, -the lexer library has no notion of typedef names vs variable names: both are -returned as identifiers, and the parser is left to decide whether a specific -identifier is a typedef or a variable (tracking this requires scope information -among other things). The parser can do this translation by replacing tokens -returned by the preprocessor with "Annotation Tokens".</p> - -<!-- ======================================================================= --> -<h3 id="AnnotationToken">Annotation Tokens</h3> -<!-- ======================================================================= --> - -<p>Annotation Tokens are tokens that are synthesized by the parser and injected -into the preprocessor's token stream (replacing existing tokens) to record -semantic information found by the parser. For example, if "foo" is found to be -a typedef, the "foo" <tt>tok::identifier</tt> token is replaced with an -<tt>tok::annot_typename</tt>. This is useful for a couple of reasons: 1) this -makes it easy to handle qualified type names (e.g. "foo::bar::baz<42>::t") -in C++ as a single "token" in the parser. 2) if the parser backtracks, the -reparse does not need to redo semantic analysis to determine whether a token -sequence is a variable, type, template, etc.</p> - -<p>Annotation Tokens are created by the parser and reinjected into the parser's -token stream (when backtracking is enabled). Because they can only exist in -tokens that the preprocessor-proper is done with, it doesn't need to keep around -flags like "start of line" that the preprocessor uses to do its job. -Additionally, an annotation token may "cover" a sequence of preprocessor tokens -(e.g. <tt>a::b::c</tt> is five preprocessor tokens). As such, the valid fields -of an annotation token are different than the fields for a normal token (but -they are multiplexed into the normal Token fields):</p> - -<ul> -<li><b>SourceLocation "Location"</b> - The SourceLocation for the annotation -token indicates the first token replaced by the annotation token. In the example -above, it would be the location of the "a" identifier.</li> - -<li><b>SourceLocation "AnnotationEndLoc"</b> - This holds the location of the -last token replaced with the annotation token. In the example above, it would -be the location of the "c" identifier.</li> - -<li><b>void* "AnnotationValue"</b> - This contains an opaque object -that the parser gets from Sema. The parser merely preserves the -information for Sema to later interpret based on the annotation token -kind.</li> - -<li><b>TokenKind "Kind"</b> - This indicates the kind of Annotation token this -is. See below for the different valid kinds.</li> -</ul> - -<p>Annotation tokens currently come in three kinds:</p> - -<ol> -<li><b>tok::annot_typename</b>: This annotation token represents a -resolved typename token that is potentially qualified. The -AnnotationValue field contains the <tt>QualType</tt> returned by -Sema::getTypeName(), possibly with source location information -attached.</li> - -<li><b>tok::annot_cxxscope</b>: This annotation token represents a C++ -scope specifier, such as "A::B::". This corresponds to the grammar -productions "::" and ":: [opt] nested-name-specifier". The -AnnotationValue pointer is a <tt>NestedNameSpecifier*</tt> returned by -the Sema::ActOnCXXGlobalScopeSpecifier and -Sema::ActOnCXXNestedNameSpecifier callbacks.</li> - -<li><b>tok::annot_template_id</b>: This annotation token represents a -C++ template-id such as "foo<int, 4>", where "foo" is the name -of a template. The AnnotationValue pointer is a pointer to a malloc'd -TemplateIdAnnotation object. Depending on the context, a parsed -template-id that names a type might become a typename annotation token -(if all we care about is the named type, e.g., because it occurs in a -type specifier) or might remain a template-id token (if we want to -retain more source location information or produce a new type, e.g., -in a declaration of a class template specialization). template-id -annotation tokens that refer to a type can be "upgraded" to typename -annotation tokens by the parser.</li> - -</ol> - -<p>As mentioned above, annotation tokens are not returned by the preprocessor, -they are formed on demand by the parser. This means that the parser has to be -aware of cases where an annotation could occur and form it where appropriate. -This is somewhat similar to how the parser handles Translation Phase 6 of C99: -String Concatenation (see C99 5.1.1.2). In the case of string concatenation, -the preprocessor just returns distinct tok::string_literal and -tok::wide_string_literal tokens and the parser eats a sequence of them wherever -the grammar indicates that a string literal can occur.</p> - -<p>In order to do this, whenever the parser expects a tok::identifier or -tok::coloncolon, it should call the TryAnnotateTypeOrScopeToken or -TryAnnotateCXXScopeToken methods to form the annotation token. These methods -will maximally form the specified annotation tokens and replace the current -token with them, if applicable. If the current tokens is not valid for an -annotation token, it will remain an identifier or :: token.</p> - - - -<!-- ======================================================================= --> -<h3 id="Lexer">The Lexer class</h3> -<!-- ======================================================================= --> - -<p>The Lexer class provides the mechanics of lexing tokens out of a source -buffer and deciding what they mean. The Lexer is complicated by the fact that -it operates on raw buffers that have not had spelling eliminated (this is a -necessity to get decent performance), but this is countered with careful coding -as well as standard performance techniques (for example, the comment handling -code is vectorized on X86 and PowerPC hosts).</p> - -<p>The lexer has a couple of interesting modal features:</p> - -<ul> -<li>The lexer can operate in 'raw' mode. This mode has several features that - make it possible to quickly lex the file (e.g. it stops identifier lookup, - doesn't specially handle preprocessor tokens, handles EOF differently, etc). - This mode is used for lexing within an "<tt>#if 0</tt>" block, for - example.</li> -<li>The lexer can capture and return comments as tokens. This is required to - support the -C preprocessor mode, which passes comments through, and is - used by the diagnostic checker to identifier expect-error annotations.</li> -<li>The lexer can be in ParsingFilename mode, which happens when preprocessing - after reading a #include directive. This mode changes the parsing of '<' - to return an "angled string" instead of a bunch of tokens for each thing - within the filename.</li> -<li>When parsing a preprocessor directive (after "<tt>#</tt>") the - ParsingPreprocessorDirective mode is entered. This changes the parser to - return EOD at a newline.</li> -<li>The Lexer uses a LangOptions object to know whether trigraphs are enabled, - whether C++ or ObjC keywords are recognized, etc.</li> -</ul> - -<p>In addition to these modes, the lexer keeps track of a couple of other - features that are local to a lexed buffer, which change as the buffer is - lexed:</p> - -<ul> -<li>The Lexer uses BufferPtr to keep track of the current character being - lexed.</li> -<li>The Lexer uses IsAtStartOfLine to keep track of whether the next lexed token - will start with its "start of line" bit set.</li> -<li>The Lexer keeps track of the current #if directives that are active (which - can be nested).</li> -<li>The Lexer keeps track of an <a href="#MultipleIncludeOpt"> - MultipleIncludeOpt</a> object, which is used to - detect whether the buffer uses the standard "<tt>#ifndef XX</tt> / - <tt>#define XX</tt>" idiom to prevent multiple inclusion. If a buffer does, - subsequent includes can be ignored if the XX macro is defined.</li> -</ul> - -<!-- ======================================================================= --> -<h3 id="TokenLexer">The TokenLexer class</h3> -<!-- ======================================================================= --> - -<p>The TokenLexer class is a token provider that returns tokens from a list -of tokens that came from somewhere else. It typically used for two things: 1) -returning tokens from a macro definition as it is being expanded 2) returning -tokens from an arbitrary buffer of tokens. The later use is used by _Pragma and -will most likely be used to handle unbounded look-ahead for the C++ parser.</p> - -<!-- ======================================================================= --> -<h3 id="MultipleIncludeOpt">The MultipleIncludeOpt class</h3> -<!-- ======================================================================= --> - -<p>The MultipleIncludeOpt class implements a really simple little state machine -that is used to detect the standard "<tt>#ifndef XX</tt> / <tt>#define XX</tt>" -idiom that people typically use to prevent multiple inclusion of headers. If a -buffer uses this idiom and is subsequently #include'd, the preprocessor can -simply check to see whether the guarding condition is defined or not. If so, -the preprocessor can completely ignore the include of the header.</p> - - - -<!-- ======================================================================= --> -<h2 id="libparse">The Parser Library</h2> -<!-- ======================================================================= --> - -<!-- ======================================================================= --> -<h2 id="libast">The AST Library</h2> -<!-- ======================================================================= --> - -<!-- ======================================================================= --> -<h3 id="Type">The Type class and its subclasses</h3> -<!-- ======================================================================= --> - -<p>The Type class (and its subclasses) are an important part of the AST. Types -are accessed through the ASTContext class, which implicitly creates and uniques -them as they are needed. Types have a couple of non-obvious features: 1) they -do not capture type qualifiers like const or volatile (See -<a href="#QualType">QualType</a>), and 2) they implicitly capture typedef -information. Once created, types are immutable (unlike decls).</p> - -<p>Typedefs in C make semantic analysis a bit more complex than it would -be without them. The issue is that we want to capture typedef information -and represent it in the AST perfectly, but the semantics of operations need to -"see through" typedefs. For example, consider this code:</p> - -<code> -void func() {<br> - typedef int foo;<br> - foo X, *Y;<br> - typedef foo* bar;<br> - bar Z;<br> - *X; <i>// error</i><br> - **Y; <i>// error</i><br> - **Z; <i>// error</i><br> -}<br> -</code> - -<p>The code above is illegal, and thus we expect there to be diagnostics emitted -on the annotated lines. In this example, we expect to get:</p> - -<pre> -<b>test.c:6:1: error: indirection requires pointer operand ('foo' invalid)</b> -*X; // error -<span style="color:blue">^~</span> -<b>test.c:7:1: error: indirection requires pointer operand ('foo' invalid)</b> -**Y; // error -<span style="color:blue">^~~</span> -<b>test.c:8:1: error: indirection requires pointer operand ('foo' invalid)</b> -**Z; // error -<span style="color:blue">^~~</span> -</pre> - -<p>While this example is somewhat silly, it illustrates the point: we want to -retain typedef information where possible, so that we can emit errors about -"<tt>std::string</tt>" instead of "<tt>std::basic_string<char, std:...</tt>". -Doing this requires properly keeping typedef information (for example, the type -of "X" is "foo", not "int"), and requires properly propagating it through the -various operators (for example, the type of *Y is "foo", not "int"). In order -to retain this information, the type of these expressions is an instance of the -TypedefType class, which indicates that the type of these expressions is a -typedef for foo. -</p> - -<p>Representing types like this is great for diagnostics, because the -user-specified type is always immediately available. There are two problems -with this: first, various semantic checks need to make judgements about the -<em>actual structure</em> of a type, ignoring typedefs. Second, we need an -efficient way to query whether two types are structurally identical to each -other, ignoring typedefs. The solution to both of these problems is the idea of -canonical types.</p> - -<!-- =============== --> -<h4>Canonical Types</h4> -<!-- =============== --> - -<p>Every instance of the Type class contains a canonical type pointer. For -simple types with no typedefs involved (e.g. "<tt>int</tt>", "<tt>int*</tt>", -"<tt>int**</tt>"), the type just points to itself. For types that have a -typedef somewhere in their structure (e.g. "<tt>foo</tt>", "<tt>foo*</tt>", -"<tt>foo**</tt>", "<tt>bar</tt>"), the canonical type pointer points to their -structurally equivalent type without any typedefs (e.g. "<tt>int</tt>", -"<tt>int*</tt>", "<tt>int**</tt>", and "<tt>int*</tt>" respectively).</p> - -<p>This design provides a constant time operation (dereferencing the canonical -type pointer) that gives us access to the structure of types. For example, -we can trivially tell that "bar" and "foo*" are the same type by dereferencing -their canonical type pointers and doing a pointer comparison (they both point -to the single "<tt>int*</tt>" type).</p> - -<p>Canonical types and typedef types bring up some complexities that must be -carefully managed. Specifically, the "isa/cast/dyncast" operators generally -shouldn't be used in code that is inspecting the AST. For example, when type -checking the indirection operator (unary '*' on a pointer), the type checker -must verify that the operand has a pointer type. It would not be correct to -check that with "<tt>isa<PointerType>(SubExpr->getType())</tt>", -because this predicate would fail if the subexpression had a typedef type.</p> - -<p>The solution to this problem are a set of helper methods on Type, used to -check their properties. In this case, it would be correct to use -"<tt>SubExpr->getType()->isPointerType()</tt>" to do the check. This -predicate will return true if the <em>canonical type is a pointer</em>, which is -true any time the type is structurally a pointer type. The only hard part here -is remembering not to use the <tt>isa/cast/dyncast</tt> operations.</p> - -<p>The second problem we face is how to get access to the pointer type once we -know it exists. To continue the example, the result type of the indirection -operator is the pointee type of the subexpression. In order to determine the -type, we need to get the instance of PointerType that best captures the typedef -information in the program. If the type of the expression is literally a -PointerType, we can return that, otherwise we have to dig through the -typedefs to find the pointer type. For example, if the subexpression had type -"<tt>foo*</tt>", we could return that type as the result. If the subexpression -had type "<tt>bar</tt>", we want to return "<tt>foo*</tt>" (note that we do -<em>not</em> want "<tt>int*</tt>"). In order to provide all of this, Type has -a getAsPointerType() method that checks whether the type is structurally a -PointerType and, if so, returns the best one. If not, it returns a null -pointer.</p> - -<p>This structure is somewhat mystical, but after meditating on it, it will -make sense to you :).</p> - -<!-- ======================================================================= --> -<h3 id="QualType">The QualType class</h3> -<!-- ======================================================================= --> - -<p>The QualType class is designed as a trivial value class that is -small, passed by-value and is efficient to query. The idea of -QualType is that it stores the type qualifiers (const, volatile, -restrict, plus some extended qualifiers required by language -extensions) separately from the types themselves. QualType is -conceptually a pair of "Type*" and the bits for these type qualifiers.</p> - -<p>By storing the type qualifiers as bits in the conceptual pair, it is -extremely efficient to get the set of qualifiers on a QualType (just return the -field of the pair), add a type qualifier (which is a trivial constant-time -operation that sets a bit), and remove one or more type qualifiers (just return -a QualType with the bitfield set to empty).</p> - -<p>Further, because the bits are stored outside of the type itself, we do not -need to create duplicates of types with different sets of qualifiers (i.e. there -is only a single heap allocated "int" type: "const int" and "volatile const int" -both point to the same heap allocated "int" type). This reduces the heap size -used to represent bits and also means we do not have to consider qualifiers when -uniquing types (<a href="#Type">Type</a> does not even contain qualifiers).</p> - -<p>In practice, the two most common type qualifiers (const and -restrict) are stored in the low bits of the pointer to the Type -object, together with a flag indicating whether extended qualifiers -are present (which must be heap-allocated). This means that QualType -is exactly the same size as a pointer.</p> - -<!-- ======================================================================= --> -<h3 id="DeclarationName">Declaration names</h3> -<!-- ======================================================================= --> - -<p>The <tt>DeclarationName</tt> class represents the name of a - declaration in Clang. Declarations in the C family of languages can - take several different forms. Most declarations are named by - simple identifiers, e.g., "<code>f</code>" and "<code>x</code>" in - the function declaration <code>f(int x)</code>. In C++, declaration - names can also name class constructors ("<code>Class</code>" - in <code>struct Class { Class(); }</code>), class destructors - ("<code>~Class</code>"), overloaded operator names ("operator+"), - and conversion functions ("<code>operator void const *</code>"). In - Objective-C, declaration names can refer to the names of Objective-C - methods, which involve the method name and the parameters, - collectively called a <i>selector</i>, e.g., - "<code>setWidth:height:</code>". Since all of these kinds of - entities - variables, functions, Objective-C methods, C++ - constructors, destructors, and operators - are represented as - subclasses of Clang's common <code>NamedDecl</code> - class, <code>DeclarationName</code> is designed to efficiently - represent any kind of name.</p> - -<p>Given - a <code>DeclarationName</code> <code>N</code>, <code>N.getNameKind()</code> - will produce a value that describes what kind of name <code>N</code> - stores. There are 8 options (all of the names are inside - the <code>DeclarationName</code> class)</p> -<dl> - <dt>Identifier</dt> - <dd>The name is a simple - identifier. Use <code>N.getAsIdentifierInfo()</code> to retrieve the - corresponding <code>IdentifierInfo*</code> pointing to the actual - identifier. Note that C++ overloaded operators (e.g., - "<code>operator+</code>") are represented as special kinds of - identifiers. Use <code>IdentifierInfo</code>'s <code>getOverloadedOperatorID</code> - function to determine whether an identifier is an overloaded - operator name.</dd> - - <dt>ObjCZeroArgSelector, ObjCOneArgSelector, - ObjCMultiArgSelector</dt> - <dd>The name is an Objective-C selector, which can be retrieved as a - <code>Selector</code> instance - via <code>N.getObjCSelector()</code>. The three possible name - kinds for Objective-C reflect an optimization within - the <code>DeclarationName</code> class: both zero- and - one-argument selectors are stored as a - masked <code>IdentifierInfo</code> pointer, and therefore require - very little space, since zero- and one-argument selectors are far - more common than multi-argument selectors (which use a different - structure).</dd> - - <dt>CXXConstructorName</dt> - <dd>The name is a C++ constructor - name. Use <code>N.getCXXNameType()</code> to retrieve - the <a href="#QualType">type</a> that this constructor is meant to - construct. The type is always the canonical type, since all - constructors for a given type have the same name.</dd> - - <dt>CXXDestructorName</dt> - <dd>The name is a C++ destructor - name. Use <code>N.getCXXNameType()</code> to retrieve - the <a href="#QualType">type</a> whose destructor is being - named. This type is always a canonical type.</dd> - - <dt>CXXConversionFunctionName</dt> - <dd>The name is a C++ conversion function. Conversion functions are - named according to the type they convert to, e.g., "<code>operator void - const *</code>". Use <code>N.getCXXNameType()</code> to retrieve - the type that this conversion function converts to. This type is - always a canonical type.</dd> - - <dt>CXXOperatorName</dt> - <dd>The name is a C++ overloaded operator name. Overloaded operators - are named according to their spelling, e.g., - "<code>operator+</code>" or "<code>operator new - []</code>". Use <code>N.getCXXOverloadedOperator()</code> to - retrieve the overloaded operator (a value of - type <code>OverloadedOperatorKind</code>).</dd> -</dl> - -<p><code>DeclarationName</code>s are cheap to create, copy, and - compare. They require only a single pointer's worth of storage in - the common cases (identifiers, zero- - and one-argument Objective-C selectors) and use dense, uniqued - storage for the other kinds of - names. Two <code>DeclarationName</code>s can be compared for - equality (<code>==</code>, <code>!=</code>) using a simple bitwise - comparison, can be ordered - with <code><</code>, <code>></code>, <code><=</code>, - and <code>>=</code> (which provide a lexicographical ordering for - normal identifiers but an unspecified ordering for other kinds of - names), and can be placed into LLVM <code>DenseMap</code>s - and <code>DenseSet</code>s.</p> - -<p><code>DeclarationName</code> instances can be created in different - ways depending on what kind of name the instance will store. Normal - identifiers (<code>IdentifierInfo</code> pointers) and Objective-C selectors - (<code>Selector</code>) can be implicitly converted - to <code>DeclarationName</code>s. Names for C++ constructors, - destructors, conversion functions, and overloaded operators can be retrieved from - the <code>DeclarationNameTable</code>, an instance of which is - available as <code>ASTContext::DeclarationNames</code>. The member - functions <code>getCXXConstructorName</code>, <code>getCXXDestructorName</code>, - <code>getCXXConversionFunctionName</code>, and <code>getCXXOperatorName</code>, respectively, - return <code>DeclarationName</code> instances for the four kinds of - C++ special function names.</p> - -<!-- ======================================================================= --> -<h3 id="DeclContext">Declaration contexts</h3> -<!-- ======================================================================= --> -<p>Every declaration in a program exists within some <i>declaration - context</i>, such as a translation unit, namespace, class, or - function. Declaration contexts in Clang are represented by - the <code>DeclContext</code> class, from which the various - declaration-context AST nodes - (<code>TranslationUnitDecl</code>, <code>NamespaceDecl</code>, <code>RecordDecl</code>, <code>FunctionDecl</code>, - etc.) will derive. The <code>DeclContext</code> class provides - several facilities common to each declaration context:</p> -<dl> - <dt>Source-centric vs. Semantics-centric View of Declarations</dt> - <dd><code>DeclContext</code> provides two views of the declarations - stored within a declaration context. The source-centric view - accurately represents the program source code as written, including - multiple declarations of entities where present (see the - section <a href="#Redeclarations">Redeclarations and - Overloads</a>), while the semantics-centric view represents the - program semantics. The two views are kept synchronized by semantic - analysis while the ASTs are being constructed.</dd> - - <dt>Storage of declarations within that context</dt> - <dd>Every declaration context can contain some number of - declarations. For example, a C++ class (represented - by <code>RecordDecl</code>) contains various member functions, - fields, nested types, and so on. All of these declarations will be - stored within the <code>DeclContext</code>, and one can iterate - over the declarations via - [<code>DeclContext::decls_begin()</code>, - <code>DeclContext::decls_end()</code>). This mechanism provides - the source-centric view of declarations in the context.</dd> - - <dt>Lookup of declarations within that context</dt> - <dd>The <code>DeclContext</code> structure provides efficient name - lookup for names within that declaration context. For example, - if <code>N</code> is a namespace we can look for the - name <code>N::f</code> - using <code>DeclContext::lookup</code>. The lookup itself is - based on a lazily-constructed array (for declaration contexts - with a small number of declarations) or hash table (for - declaration contexts with more declarations). The lookup - operation provides the semantics-centric view of the declarations - in the context.</dd> - - <dt>Ownership of declarations</dt> - <dd>The <code>DeclContext</code> owns all of the declarations that - were declared within its declaration context, and is responsible - for the management of their memory as well as their - (de-)serialization.</dd> -</dl> - -<p>All declarations are stored within a declaration context, and one - can query - information about the context in which each declaration lives. One - can retrieve the <code>DeclContext</code> that contains a - particular <code>Decl</code> - using <code>Decl::getDeclContext</code>. However, see the - section <a href="#LexicalAndSemanticContexts">Lexical and Semantic - Contexts</a> for more information about how to interpret this - context information.</p> - -<h4 id="Redeclarations">Redeclarations and Overloads</h4> -<p>Within a translation unit, it is common for an entity to be -declared several times. For example, we might declare a function "f" - and then later re-declare it as part of an inlined definition:</p> - -<pre> -void f(int x, int y, int z = 1); - -inline void f(int x, int y, int z) { /* ... */ } -</pre> - -<p>The representation of "f" differs in the source-centric and - semantics-centric views of a declaration context. In the - source-centric view, all redeclarations will be present, in the - order they occurred in the source code, making - this view suitable for clients that wish to see the structure of - the source code. In the semantics-centric view, only the most recent "f" - will be found by the lookup, since it effectively replaces the first - declaration of "f".</p> - -<p>In the semantics-centric view, overloading of functions is - represented explicitly. For example, given two declarations of a - function "g" that are overloaded, e.g.,</p> -<pre> -void g(); -void g(int); -</pre> -<p>the <code>DeclContext::lookup</code> operation will return - a <code>DeclContext::lookup_result</code> that contains a range of iterators - over declarations of "g". Clients that perform semantic analysis on a - program that is not concerned with the actual source code will - primarily use this semantics-centric view.</p> - -<h4 id="LexicalAndSemanticContexts">Lexical and Semantic Contexts</h4> -<p>Each declaration has two potentially different - declaration contexts: a <i>lexical</i> context, which corresponds to - the source-centric view of the declaration context, and - a <i>semantic</i> context, which corresponds to the - semantics-centric view. The lexical context is accessible - via <code>Decl::getLexicalDeclContext</code> while the - semantic context is accessible - via <code>Decl::getDeclContext</code>, both of which return - <code>DeclContext</code> pointers. For most declarations, the two - contexts are identical. For example:</p> - -<pre> -class X { -public: - void f(int x); -}; -</pre> - -<p>Here, the semantic and lexical contexts of <code>X::f</code> are - the <code>DeclContext</code> associated with the - class <code>X</code> (itself stored as a <code>RecordDecl</code> AST - node). However, we can now define <code>X::f</code> out-of-line:</p> - -<pre> -void X::f(int x = 17) { /* ... */ } -</pre> - -<p>This definition of has different lexical and semantic - contexts. The lexical context corresponds to the declaration - context in which the actual declaration occurred in the source - code, e.g., the translation unit containing <code>X</code>. Thus, - this declaration of <code>X::f</code> can be found by traversing - the declarations provided by - [<code>decls_begin()</code>, <code>decls_end()</code>) in the - translation unit.</p> - -<p>The semantic context of <code>X::f</code> corresponds to the - class <code>X</code>, since this member function is (semantically) a - member of <code>X</code>. Lookup of the name <code>f</code> into - the <code>DeclContext</code> associated with <code>X</code> will - then return the definition of <code>X::f</code> (including - information about the default argument).</p> - -<h4 id="TransparentContexts">Transparent Declaration Contexts</h4> -<p>In C and C++, there are several contexts in which names that are - logically declared inside another declaration will actually "leak" - out into the enclosing scope from the perspective of name - lookup. The most obvious instance of this behavior is in - enumeration types, e.g.,</p> -<pre> -enum Color { - Red, - Green, - Blue -}; -</pre> - -<p>Here, <code>Color</code> is an enumeration, which is a declaration - context that contains the - enumerators <code>Red</code>, <code>Green</code>, - and <code>Blue</code>. Thus, traversing the list of declarations - contained in the enumeration <code>Color</code> will - yield <code>Red</code>, <code>Green</code>, - and <code>Blue</code>. However, outside of the scope - of <code>Color</code> one can name the enumerator <code>Red</code> - without qualifying the name, e.g.,</p> - -<pre> -Color c = Red; -</pre> - -<p>There are other entities in C++ that provide similar behavior. For - example, linkage specifications that use curly braces:</p> - -<pre> -extern "C" { - void f(int); - void g(int); -} -// f and g are visible here -</pre> - -<p>For source-level accuracy, we treat the linkage specification and - enumeration type as a - declaration context in which its enclosed declarations ("Red", - "Green", and "Blue"; "f" and "g") - are declared. However, these declarations are visible outside of the - scope of the declaration context.</p> - -<p>These language features (and several others, described below) have - roughly the same set of - requirements: declarations are declared within a particular lexical - context, but the declarations are also found via name lookup in - scopes enclosing the declaration itself. This feature is implemented - via <i>transparent</i> declaration contexts - (see <code>DeclContext::isTransparentContext()</code>), whose - declarations are visible in the nearest enclosing non-transparent - declaration context. This means that the lexical context of the - declaration (e.g., an enumerator) will be the - transparent <code>DeclContext</code> itself, as will the semantic - context, but the declaration will be visible in every outer context - up to and including the first non-transparent declaration context (since - transparent declaration contexts can be nested).</p> - -<p>The transparent <code>DeclContexts</code> are:</p> -<ul> - <li>Enumerations (but not C++11 "scoped enumerations"): - <pre> -enum Color { - Red, - Green, - Blue -}; -// Red, Green, and Blue are in scope - </pre></li> - <li>C++ linkage specifications: - <pre> -extern "C" { - void f(int); - void g(int); -} -// f and g are in scope - </pre></li> - <li>Anonymous unions and structs: - <pre> -struct LookupTable { - bool IsVector; - union { - std::vector<Item> *Vector; - std::set<Item> *Set; - }; -}; - -LookupTable LT; -LT.Vector = 0; // Okay: finds Vector inside the unnamed union - </pre> - </li> - <li>C++11 inline namespaces: -<pre> -namespace mylib { - inline namespace debug { - class X; - } -} -mylib::X *xp; // okay: mylib::X refers to mylib::debug::X -</pre> -</li> -</ul> - - -<h4 id="MultiDeclContext">Multiply-Defined Declaration Contexts</h4> -<p>C++ namespaces have the interesting--and, so far, unique--property that -the namespace can be defined multiple times, and the declarations -provided by each namespace definition are effectively merged (from -the semantic point of view). For example, the following two code -snippets are semantically indistinguishable:</p> -<pre> -// Snippet #1: -namespace N { - void f(); -} -namespace N { - void f(int); -} - -// Snippet #2: -namespace N { - void f(); - void f(int); -} -</pre> - -<p>In Clang's representation, the source-centric view of declaration - contexts will actually have two separate <code>NamespaceDecl</code> - nodes in Snippet #1, each of which is a declaration context that - contains a single declaration of "f". However, the semantics-centric - view provided by name lookup into the namespace <code>N</code> for - "f" will return a <code>DeclContext::lookup_result</code> that contains - a range of iterators over declarations of "f".</p> - -<p><code>DeclContext</code> manages multiply-defined declaration - contexts internally. The - function <code>DeclContext::getPrimaryContext</code> retrieves the - "primary" context for a given <code>DeclContext</code> instance, - which is the <code>DeclContext</code> responsible for maintaining - the lookup table used for the semantics-centric view. Given the - primary context, one can follow the chain - of <code>DeclContext</code> nodes that define additional - declarations via <code>DeclContext::getNextContext</code>. Note that - these functions are used internally within the lookup and insertion - methods of the <code>DeclContext</code>, so the vast majority of - clients can ignore them.</p> - -<!-- ======================================================================= --> -<h3 id="CFG">The <tt>CFG</tt> class</h3> -<!-- ======================================================================= --> - -<p>The <tt>CFG</tt> class is designed to represent a source-level -control-flow graph for a single statement (<tt>Stmt*</tt>). Typically -instances of <tt>CFG</tt> are constructed for function bodies (usually -an instance of <tt>CompoundStmt</tt>), but can also be instantiated to -represent the control-flow of any class that subclasses <tt>Stmt</tt>, -which includes simple expressions. Control-flow graphs are especially -useful for performing -<a href="http://en.wikipedia.org/wiki/Data_flow_analysis#Sensitivities">flow- -or path-sensitive</a> program analyses on a given function.</p> - -<!-- ============ --> -<h4>Basic Blocks</h4> -<!-- ============ --> - -<p>Concretely, an instance of <tt>CFG</tt> is a collection of basic -blocks. Each basic block is an instance of <tt>CFGBlock</tt>, which -simply contains an ordered sequence of <tt>Stmt*</tt> (each referring -to statements in the AST). The ordering of statements within a block -indicates unconditional flow of control from one statement to the -next. <a href="#ConditionalControlFlow">Conditional control-flow</a> -is represented using edges between basic blocks. The statements -within a given <tt>CFGBlock</tt> can be traversed using -the <tt>CFGBlock::*iterator</tt> interface.</p> - -<p> -A <tt>CFG</tt> object owns the instances of <tt>CFGBlock</tt> within -the control-flow graph it represents. Each <tt>CFGBlock</tt> within a -CFG is also uniquely numbered (accessible -via <tt>CFGBlock::getBlockID()</tt>). Currently the number is -based on the ordering the blocks were created, but no assumptions -should be made on how <tt>CFGBlock</tt>s are numbered other than their -numbers are unique and that they are numbered from 0..N-1 (where N is -the number of basic blocks in the CFG).</p> - -<!-- ===================== --> -<h4>Entry and Exit Blocks</h4> -<!-- ===================== --> - -Each instance of <tt>CFG</tt> contains two special blocks: -an <i>entry</i> block (accessible via <tt>CFG::getEntry()</tt>), which -has no incoming edges, and an <i>exit</i> block (accessible -via <tt>CFG::getExit()</tt>), which has no outgoing edges. Neither -block contains any statements, and they serve the role of providing a -clear entrance and exit for a body of code such as a function body. -The presence of these empty blocks greatly simplifies the -implementation of many analyses built on top of CFGs. - -<!-- ===================================================== --> -<h4 id ="ConditionalControlFlow">Conditional Control-Flow</h4> -<!-- ===================================================== --> - -<p>Conditional control-flow (such as those induced by if-statements -and loops) is represented as edges between <tt>CFGBlock</tt>s. -Because different C language constructs can induce control-flow, -each <tt>CFGBlock</tt> also records an extra <tt>Stmt*</tt> that -represents the <i>terminator</i> of the block. A terminator is simply -the statement that caused the control-flow, and is used to identify -the nature of the conditional control-flow between blocks. For -example, in the case of an if-statement, the terminator refers to -the <tt>IfStmt</tt> object in the AST that represented the given -branch.</p> - -<p>To illustrate, consider the following code example:</p> - -<code> -int foo(int x) {<br> - x = x + 1;<br> -<br> - if (x > 2) x++;<br> - else {<br> - x += 2;<br> - x *= 2;<br> - }<br> -<br> - return x;<br> -} -</code> - -<p>After invoking the parser+semantic analyzer on this code fragment, -the AST of the body of <tt>foo</tt> is referenced by a -single <tt>Stmt*</tt>. We can then construct an instance -of <tt>CFG</tt> representing the control-flow graph of this function -body by single call to a static class method:</p> - -<code> - Stmt* FooBody = ...<br> - CFG* FooCFG = <b>CFG::buildCFG</b>(FooBody); -</code> - -<p>It is the responsibility of the caller of <tt>CFG::buildCFG</tt> -to <tt>delete</tt> the returned <tt>CFG*</tt> when the CFG is no -longer needed.</p> - -<p>Along with providing an interface to iterate over -its <tt>CFGBlock</tt>s, the <tt>CFG</tt> class also provides methods -that are useful for debugging and visualizing CFGs. For example, the -method -<tt>CFG::dump()</tt> dumps a pretty-printed version of the CFG to -standard error. This is especially useful when one is using a -debugger such as gdb. For example, here is the output -of <tt>FooCFG->dump()</tt>:</p> - -<code> - [ B5 (ENTRY) ]<br> - Predecessors (0):<br> - Successors (1): B4<br> -<br> - [ B4 ]<br> - 1: x = x + 1<br> - 2: (x > 2)<br> - <b>T: if [B4.2]</b><br> - Predecessors (1): B5<br> - Successors (2): B3 B2<br> -<br> - [ B3 ]<br> - 1: x++<br> - Predecessors (1): B4<br> - Successors (1): B1<br> -<br> - [ B2 ]<br> - 1: x += 2<br> - 2: x *= 2<br> - Predecessors (1): B4<br> - Successors (1): B1<br> -<br> - [ B1 ]<br> - 1: return x;<br> - Predecessors (2): B2 B3<br> - Successors (1): B0<br> -<br> - [ B0 (EXIT) ]<br> - Predecessors (1): B1<br> - Successors (0): -</code> - -<p>For each block, the pretty-printed output displays for each block -the number of <i>predecessor</i> blocks (blocks that have outgoing -control-flow to the given block) and <i>successor</i> blocks (blocks -that have control-flow that have incoming control-flow from the given -block). We can also clearly see the special entry and exit blocks at -the beginning and end of the pretty-printed output. For the entry -block (block B5), the number of predecessor blocks is 0, while for the -exit block (block B0) the number of successor blocks is 0.</p> - -<p>The most interesting block here is B4, whose outgoing control-flow -represents the branching caused by the sole if-statement -in <tt>foo</tt>. Of particular interest is the second statement in -the block, <b><tt>(x > 2)</tt></b>, and the terminator, printed -as <b><tt>if [B4.2]</tt></b>. The second statement represents the -evaluation of the condition of the if-statement, which occurs before -the actual branching of control-flow. Within the <tt>CFGBlock</tt> -for B4, the <tt>Stmt*</tt> for the second statement refers to the -actual expression in the AST for <b><tt>(x > 2)</tt></b>. Thus -pointers to subclasses of <tt>Expr</tt> can appear in the list of -statements in a block, and not just subclasses of <tt>Stmt</tt> that -refer to proper C statements.</p> - -<p>The terminator of block B4 is a pointer to the <tt>IfStmt</tt> -object in the AST. The pretty-printer outputs <b><tt>if -[B4.2]</tt></b> because the condition expression of the if-statement -has an actual place in the basic block, and thus the terminator is -essentially -<i>referring</i> to the expression that is the second statement of -block B4 (i.e., B4.2). In this manner, conditions for control-flow -(which also includes conditions for loops and switch statements) are -hoisted into the actual basic block.</p> - -<!-- ===================== --> -<!-- <h4>Implicit Control-Flow</h4> --> -<!-- ===================== --> - -<!-- -<p>A key design principle of the <tt>CFG</tt> class was to not require -any transformations to the AST in order to represent control-flow. -Thus the <tt>CFG</tt> does not perform any "lowering" of the -statements in an AST: loops are not transformed into guarded gotos, -short-circuit operations are not converted to a set of if-statements, -and so on.</p> ---> - - -<!-- ======================================================================= --> -<h3 id="Constants">Constant Folding in the Clang AST</h3> -<!-- ======================================================================= --> - -<p>There are several places where constants and constant folding matter a lot to -the Clang front-end. First, in general, we prefer the AST to retain the source -code as close to how the user wrote it as possible. This means that if they -wrote "5+4", we want to keep the addition and two constants in the AST, we don't -want to fold to "9". This means that constant folding in various ways turns -into a tree walk that needs to handle the various cases.</p> - -<p>However, there are places in both C and C++ that require constants to be -folded. For example, the C standard defines what an "integer constant -expression" (i-c-e) is with very precise and specific requirements. The -language then requires i-c-e's in a lot of places (for example, the size of a -bitfield, the value for a case statement, etc). For these, we have to be able -to constant fold the constants, to do semantic checks (e.g. verify bitfield size -is non-negative and that case statements aren't duplicated). We aim for Clang -to be very pedantic about this, diagnosing cases when the code does not use an -i-c-e where one is required, but accepting the code unless running with -<tt>-pedantic-errors</tt>.</p> - -<p>Things get a little bit more tricky when it comes to compatibility with -real-world source code. Specifically, GCC has historically accepted a huge -superset of expressions as i-c-e's, and a lot of real world code depends on this -unfortuate accident of history (including, e.g., the glibc system headers). GCC -accepts anything its "fold" optimizer is capable of reducing to an integer -constant, which means that the definition of what it accepts changes as its -optimizer does. One example is that GCC accepts things like "case X-X:" even -when X is a variable, because it can fold this to 0.</p> - -<p>Another issue are how constants interact with the extensions we support, such -as __builtin_constant_p, __builtin_inf, __extension__ and many others. C99 -obviously does not specify the semantics of any of these extensions, and the -definition of i-c-e does not include them. However, these extensions are often -used in real code, and we have to have a way to reason about them.</p> - -<p>Finally, this is not just a problem for semantic analysis. The code -generator and other clients have to be able to fold constants (e.g. to -initialize global variables) and has to handle a superset of what C99 allows. -Further, these clients can benefit from extended information. For example, we -know that "foo()||1" always evaluates to true, but we can't replace the -expression with true because it has side effects.</p> - -<!-- ======================= --> -<h4>Implementation Approach</h4> -<!-- ======================= --> - -<p>After trying several different approaches, we've finally converged on a -design (Note, at the time of this writing, not all of this has been implemented, -consider this a design goal!). Our basic approach is to define a single -recursive method evaluation method (<tt>Expr::Evaluate</tt>), which is -implemented in <tt>AST/ExprConstant.cpp</tt>. Given an expression with 'scalar' -type (integer, fp, complex, or pointer) this method returns the following -information:</p> - -<ul> -<li>Whether the expression is an integer constant expression, a general - constant that was folded but has no side effects, a general constant that - was folded but that does have side effects, or an uncomputable/unfoldable - value. -</li> -<li>If the expression was computable in any way, this method returns the APValue - for the result of the expression.</li> -<li>If the expression is not evaluatable at all, this method returns - information on one of the problems with the expression. This includes a - SourceLocation for where the problem is, and a diagnostic ID that explains - the problem. The diagnostic should be have ERROR type.</li> -<li>If the expression is not an integer constant expression, this method returns - information on one of the problems with the expression. This includes a - SourceLocation for where the problem is, and a diagnostic ID that explains - the problem. The diagnostic should be have EXTENSION type.</li> -</ul> - -<p>This information gives various clients the flexibility that they want, and we -will eventually have some helper methods for various extensions. For example, -Sema should have a <tt>Sema::VerifyIntegerConstantExpression</tt> method, which -calls Evaluate on the expression. If the expression is not foldable, the error -is emitted, and it would return true. If the expression is not an i-c-e, the -EXTENSION diagnostic is emitted. Finally it would return false to indicate that -the AST is ok.</p> - -<p>Other clients can use the information in other ways, for example, codegen can -just use expressions that are foldable in any way.</p> - -<!-- ========== --> -<h4>Extensions</h4> -<!-- ========== --> - -<p>This section describes how some of the various extensions Clang supports -interacts with constant evaluation:</p> - -<ul> -<li><b><tt>__extension__</tt></b>: The expression form of this extension causes - any evaluatable subexpression to be accepted as an integer constant - expression.</li> -<li><b><tt>__builtin_constant_p</tt></b>: This returns true (as an integer - constant expression) if the operand evaluates to either a numeric value - (that is, not a pointer cast to integral type) of integral, enumeration, - floating or complex type, or if it evaluates to the address of the first - character of a string literal (possibly cast to some other type). As a - special case, if <tt>__builtin_constant_p</tt> is the (potentially - parenthesized) condition of a conditional operator expression ("?:"), only - the true side of the conditional operator is considered, and it is evaluated - with full constant folding.</li> -<li><b><tt>__builtin_choose_expr</tt></b>: The condition is required to be an - integer constant expression, but we accept any constant as an "extension of - an extension". This only evaluates one operand depending on which way the - condition evaluates.</li> -<li><b><tt>__builtin_classify_type</tt></b>: This always returns an integer - constant expression.</li> -<li><b><tt>__builtin_inf,nan,..</tt></b>: These are treated just like a - floating-point literal.</li> -<li><b><tt>__builtin_abs,copysign,..</tt></b>: These are constant folded as - general constant expressions.</li> -<li><b><tt>__builtin_strlen</tt></b> and <b><tt>strlen</tt></b>: These are - constant folded as integer constant expressions if the argument is a string - literal.</li> -</ul> - - -<!-- ======================================================================= --> -<h2 id="Howtos">How to change Clang</h2> -<!-- ======================================================================= --> - -<!-- ======================================================================= --> -<h3 id="AddingAttributes">How to add an attribute</h3> -<!-- ======================================================================= --> - -<p>To add an attribute, you'll have to add it to the list of attributes, add it -to the parsing phase, and look for it in the AST scan. -<a href="http://llvm.org/viewvc/llvm-project?view=rev&revision=124217">r124217</a> -has a good example of adding a warning attribute.</p> - -<p>(Beware that this hasn't been reviewed/fixed by the people who designed the -attributes system yet.)</p> - -<h4><a -href="http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Attr.td?view=markup">include/clang/Basic/Attr.td</a></h4> - -<p>Each attribute gets a <tt>def</tt> inheriting from <tt>Attr</tt> or one of -its subclasses. <tt>InheritableAttr</tt> means that the attribute also applies -to subsequent declarations of the same name.</p> - -<p><tt>Spellings</tt> lists the strings that can appear in -<tt>__attribute__((here))</tt> or <tt>[[here]]</tt>. All such strings -will be synonymous. If you want to allow the <tt>[[]]</tt> C++11 -syntax, you have to define a list of <tt>Namespaces</tt>, which will -let users write <tt>[[namespace:spelling]]</tt>. Using the empty -string for a namespace will allow users to write just the spelling -with no "<tt>:</tt>".</p> - -<p><tt>Subjects</tt> restricts what kinds of AST node to which this attribute -can appertain (roughly, attach).</p> - -<p><tt>Args</tt> names the arguments the attribute takes, in order. If -<tt>Args</tt> is <tt>[StringArgument<"Arg1">, IntArgument<"Arg2">]</tt> -then <tt>__attribute__((myattribute("Hello", 3)))</tt> will be a valid use.</p> - -<h4>Boilerplate</h4> - -<p>Write a new <tt>HandleYourAttr()</tt> function in <a -href="http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?view=markup">lib/Sema/SemaDeclAttr.cpp</a>, -and add a case to the switch in <tt>ProcessNonInheritableDeclAttr()</tt> or -<tt>ProcessInheritableDeclAttr()</tt> forwarding to it.</p> - -<p>If your attribute causes extra warnings to fire, define a <tt>DiagGroup</tt> -in <a -href="http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticGroups.td?view=markup">include/clang/Basic/DiagnosticGroups.td</a> -named after the attribute's <tt>Spelling</tt> with "_"s replaced by "-"s. If -you're only defining one diagnostic, you can skip <tt>DiagnosticGroups.td</tt> -and use <tt>InGroup<DiagGroup<"your-attribute">></tt> directly in <a -href="http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?view=markup">DiagnosticSemaKinds.td</a></p> - -<h4>The meat of your attribute</h4> - -<p>Find an appropriate place in Clang to do whatever your attribute needs to do. -Check for the attribute's presence using <tt>Decl::getAttr<YourAttr>()</tt>.</p> - -<p>Update the <a href="LanguageExtensions.html">Clang Language Extensions</a> -document to describe your new attribute.</p> - -<!-- ======================================================================= --> -<h3 id="AddingExprStmt">How to add an expression or statement</h3> -<!-- ======================================================================= --> - -<p>Expressions and statements are one of the most fundamental constructs within a -compiler, because they interact with many different parts of the AST, -semantic analysis, and IR generation. Therefore, adding a new -expression or statement kind into Clang requires some care. The following list -details the various places in Clang where an expression or statement needs to be -introduced, along with patterns to follow to ensure that the new -expression or statement works well across all of the C languages. We -focus on expressions, but statements are similar.</p> - -<ol> - <li>Introduce parsing actions into the parser. Recursive-descent - parsing is mostly self-explanatory, but there are a few things that - are worth keeping in mind: - <ul> - <li>Keep as much source location information as possible! You'll - want it later to produce great diagnostics and support Clang's - various features that map between source code and the AST.</li> - <li>Write tests for all of the "bad" parsing cases, to make sure - your recovery is good. If you have matched delimiters (e.g., - parentheses, square brackets, etc.), use - <tt>Parser::BalancedDelimiterTracker</tt> to give nice diagnostics when - things go wrong.</li> - </ul> - </li> - - <li>Introduce semantic analysis actions into <tt>Sema</tt>. Semantic - analysis should always involve two functions: an <tt>ActOnXXX</tt> - function that will be called directly from the parser, and a - <tt>BuildXXX</tt> function that performs the actual semantic - analysis and will (eventually!) build the AST node. It's fairly - common for the <tt>ActOnCXX</tt> function to do very little (often - just some minor translation from the parser's representation to - <tt>Sema</tt>'s representation of the same thing), but the separation - is still important: C++ template instantiation, for example, - should always call the <tt>BuildXXX</tt> variant. Several notes on - semantic analysis before we get into construction of the AST: - <ul> - <li>Your expression probably involves some types and some - subexpressions. Make sure to fully check that those types, and the - types of those subexpressions, meet your expectations. Add - implicit conversions where necessary to make sure that all of the - types line up exactly the way you want them. Write extensive tests - to check that you're getting good diagnostics for mistakes and - that you can use various forms of subexpressions with your - expression.</li> - <li>When type-checking a type or subexpression, make sure to first - check whether the type is "dependent" - (<tt>Type::isDependentType()</tt>) or whether a subexpression is - type-dependent (<tt>Expr::isTypeDependent()</tt>). If any of these - return true, then you're inside a template and you can't do much - type-checking now. That's normal, and your AST node (when you get - there) will have to deal with this case. At this point, you can - write tests that use your expression within templates, but don't - try to instantiate the templates.</li> - <li>For each subexpression, be sure to call - <tt>Sema::CheckPlaceholderExpr()</tt> to deal with "weird" - expressions that don't behave well as subexpressions. Then, - determine whether you need to perform - lvalue-to-rvalue conversions - (<tt>Sema::DefaultLvalueConversion</tt>e) or - the usual unary conversions - (<tt>Sema::UsualUnaryConversions</tt>), for places where the - subexpression is producing a value you intend to use.</li> - <li>Your <tt>BuildXXX</tt> function will probably just return - <tt>ExprError()</tt> at this point, since you don't have an AST. - That's perfectly fine, and shouldn't impact your testing.</li> - </ul> - </li> - - <li>Introduce an AST node for your new expression. This starts with - declaring the node in <tt>include/Basic/StmtNodes.td</tt> and - creating a new class for your expression in the appropriate - <tt>include/AST/Expr*.h</tt> header. It's best to look at the class - for a similar expression to get ideas, and there are some specific - things to watch for: - <ul> - <li>If you need to allocate memory, use the <tt>ASTContext</tt> - allocator to allocate memory. Never use raw <tt>malloc</tt> or - <tt>new</tt>, and never hold any resources in an AST node, because - the destructor of an AST node is never called.</li> - - <li>Make sure that <tt>getSourceRange()</tt> covers the exact - source range of your expression. This is needed for diagnostics - and for IDE support.</li> - - <li>Make sure that <tt>children()</tt> visits all of the - subexpressions. This is important for a number of features (e.g., IDE - support, C++ variadic templates). If you have sub-types, you'll - also need to visit those sub-types in the - <tt>RecursiveASTVisitor</tt>.</li> - - <li>Add printing support (<tt>StmtPrinter.cpp</tt>) and dumping - support (<tt>StmtDumper.cpp</tt>) for your expression.</li> - - <li>Add profiling support (<tt>StmtProfile.cpp</tt>) for your AST - node, noting the distinguishing (non-source location) - characteristics of an instance of your expression. Omitting this - step will lead to hard-to-diagnose failures regarding matching of - template declarations.</li> - </ul> - </li> - - <li>Teach semantic analysis to build your AST node! At this point, - you can wire up your <tt>Sema::BuildXXX</tt> function to actually - create your AST. A few things to check at this point: - <ul> - <li>If your expression can construct a new C++ class or return a - new Objective-C object, be sure to update and then call - <tt>Sema::MaybeBindToTemporary</tt> for your just-created AST node - to be sure that the object gets properly destructed. An easy way - to test this is to return a C++ class with a private destructor: - semantic analysis should flag an error here with the attempt to - call the destructor.</li> - <li>Inspect the generated AST by printing it using <tt>clang -cc1 - -ast-print</tt>, to make sure you're capturing all of the - important information about how the AST was written.</li> - <li>Inspect the generated AST under <tt>clang -cc1 -ast-dump</tt> - to verify that all of the types in the generated AST line up the - way you want them. Remember that clients of the AST should never - have to "think" to understand what's going on. For example, all - implicit conversions should show up explicitly in the AST.</li> - <li>Write tests that use your expression as a subexpression of - other, well-known expressions. Can you call a function using your - expression as an argument? Can you use the ternary operator?</li> - </ul> - </li> - - <li>Teach code generation to create IR to your AST node. This step - is the first (and only) that requires knowledge of LLVM IR. There - are several things to keep in mind: - <ul> - <li>Code generation is separated into scalar/aggregate/complex and - lvalue/rvalue paths, depending on what kind of result your - expression produces. On occasion, this requires some careful - factoring of code to avoid duplication.</li> - - <li><tt>CodeGenFunction</tt> contains functions - <tt>ConvertType</tt> and <tt>ConvertTypeForMem</tt> that convert - Clang's types (<tt>clang::Type*</tt> or <tt>clang::QualType</tt>) - to LLVM types. - Use the former for values, and the later for memory locations: - test with the C++ "bool" type to check this. If you find - that you are having to use LLVM bitcasts to make - the subexpressions of your expression have the type that your - expression expects, STOP! Go fix semantic analysis and the AST so - that you don't need these bitcasts.</li> - - <li>The <tt>CodeGenFunction</tt> class has a number of helper - functions to make certain operations easy, such as generating code - to produce an lvalue or an rvalue, or to initialize a memory - location with a given value. Prefer to use these functions rather - than directly writing loads and stores, because these functions - take care of some of the tricky details for you (e.g., for - exceptions).</li> - - <li>If your expression requires some special behavior in the event - of an exception, look at the <tt>push*Cleanup</tt> functions in - <tt>CodeGenFunction</tt> to introduce a cleanup. You shouldn't - have to deal with exception-handling directly.</li> - - <li>Testing is extremely important in IR generation. Use <tt>clang - -cc1 -emit-llvm</tt> and <a - href="http://llvm.org/cmds/FileCheck.html">FileCheck</a> to verify - that you're generating the right IR.</li> - </ul> - </li> - - <li>Teach template instantiation how to cope with your AST - node, which requires some fairly simple code: - <ul> - <li>Make sure that your expression's constructor properly - computes the flags for type dependence (i.e., the type your - expression produces can change from one instantiation to the - next), value dependence (i.e., the constant value your expression - produces can change from one instantiation to the next), - instantiation dependence (i.e., a template parameter occurs - anywhere in your expression), and whether your expression contains - a parameter pack (for variadic templates). Often, computing these - flags just means combining the results from the various types and - subexpressions.</li> - - <li>Add <tt>TransformXXX</tt> and <tt>RebuildXXX</tt> functions to - the - <tt>TreeTransform</tt> class template in <tt>Sema</tt>. - <tt>TransformXXX</tt> should (recursively) transform all of the - subexpressions and types - within your expression, using <tt>getDerived().TransformYYY</tt>. - If all of the subexpressions and types transform without error, it - will then call the <tt>RebuildXXX</tt> function, which will in - turn call <tt>getSema().BuildXXX</tt> to perform semantic analysis - and build your expression.</li> - - <li>To test template instantiation, take those tests you wrote to - make sure that you were type checking with type-dependent - expressions and dependent types (from step #2) and instantiate - those templates with various types, some of which type-check and - some that don't, and test the error messages in each case.</li> - </ul> - </li> - - <li>There are some "extras" that make other features work better. - It's worth handling these extras to give your expression complete - integration into Clang: - <ul> - <li>Add code completion support for your expression in - <tt>SemaCodeComplete.cpp</tt>.</li> - - <li>If your expression has types in it, or has any "interesting" - features other than subexpressions, extend libclang's - <tt>CursorVisitor</tt> to provide proper visitation for your - expression, enabling various IDE features such as syntax - highlighting, cross-referencing, and so on. The - <tt>c-index-test</tt> helper program can be used to test these - features.</li> - </ul> - </li> -</ol> - -</div> -</body> -</html> diff --git a/docs/InternalsManual.rst b/docs/InternalsManual.rst new file mode 100644 index 0000000..59dd2f9 --- /dev/null +++ b/docs/InternalsManual.rst @@ -0,0 +1,1810 @@ +============================ +"Clang" CFE Internals Manual +============================ + +.. contents:: + :local: + +Introduction +============ + +This document describes some of the more important APIs and internal design +decisions made in the Clang C front-end. The purpose of this document is to +both capture some of this high level information and also describe some of the +design decisions behind it. This is meant for people interested in hacking on +Clang, not for end-users. The description below is categorized by libraries, +and does not describe any of the clients of the libraries. + +LLVM Support Library +==================== + +The LLVM ``libSupport`` library provides many underlying libraries and +`data-structures <http://llvm.org/docs/ProgrammersManual.html>`_, including +command line option processing, various containers and a system abstraction +layer, which is used for file system access. + +The Clang "Basic" Library +========================= + +This library certainly needs a better name. The "basic" library contains a +number of low-level utilities for tracking and manipulating source buffers, +locations within the source buffers, diagnostics, tokens, target abstraction, +and information about the subset of the language being compiled for. + +Part of this infrastructure is specific to C (such as the ``TargetInfo`` +class), other parts could be reused for other non-C-based languages +(``SourceLocation``, ``SourceManager``, ``Diagnostics``, ``FileManager``). +When and if there is future demand we can figure out if it makes sense to +introduce a new library, move the general classes somewhere else, or introduce +some other solution. + +We describe the roles of these classes in order of their dependencies. + +The Diagnostics Subsystem +------------------------- + +The Clang Diagnostics subsystem is an important part of how the compiler +communicates with the human. Diagnostics are the warnings and errors produced +when the code is incorrect or dubious. In Clang, each diagnostic produced has +(at the minimum) a unique ID, an English translation associated with it, a +:ref:`SourceLocation <SourceLocation>` to "put the caret", and a severity +(e.g., ``WARNING`` or ``ERROR``). They can also optionally include a number of +arguments to the dianostic (which fill in "%0"'s in the string) as well as a +number of source ranges that related to the diagnostic. + +In this section, we'll be giving examples produced by the Clang command line +driver, but diagnostics can be :ref:`rendered in many different ways +<DiagnosticClient>` depending on how the ``DiagnosticClient`` interface is +implemented. A representative example of a diagnostic is: + +.. code-block:: c++ + + t.c:38:15: error: invalid operands to binary expression ('int *' and '_Complex float') + P = (P-42) + Gamma*4; + ~~~~~~ ^ ~~~~~~~ + +In this example, you can see the English translation, the severity (error), you +can see the source location (the caret ("``^``") and file/line/column info), +the source ranges "``~~~~``", arguments to the diagnostic ("``int*``" and +"``_Complex float``"). You'll have to believe me that there is a unique ID +backing the diagnostic :). + +Getting all of this to happen has several steps and involves many moving +pieces, this section describes them and talks about best practices when adding +a new diagnostic. + +The ``Diagnostic*Kinds.td`` files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Diagnostics are created by adding an entry to one of the +``clang/Basic/Diagnostic*Kinds.td`` files, depending on what library will be +using it. From this file, :program:`tblgen` generates the unique ID of the +diagnostic, the severity of the diagnostic and the English translation + format +string. + +There is little sanity with the naming of the unique ID's right now. Some +start with ``err_``, ``warn_``, ``ext_`` to encode the severity into the name. +Since the enum is referenced in the C++ code that produces the diagnostic, it +is somewhat useful for it to be reasonably short. + +The severity of the diagnostic comes from the set {``NOTE``, ``WARNING``, +``EXTENSION``, ``EXTWARN``, ``ERROR``}. The ``ERROR`` severity is used for +diagnostics indicating the program is never acceptable under any circumstances. +When an error is emitted, the AST for the input code may not be fully built. +The ``EXTENSION`` and ``EXTWARN`` severities are used for extensions to the +language that Clang accepts. This means that Clang fully understands and can +represent them in the AST, but we produce diagnostics to tell the user their +code is non-portable. The difference is that the former are ignored by +default, and the later warn by default. The ``WARNING`` severity is used for +constructs that are valid in the currently selected source language but that +are dubious in some way. The ``NOTE`` level is used to staple more information +onto previous diagnostics. + +These *severities* are mapped into a smaller set (the ``Diagnostic::Level`` +enum, {``Ignored``, ``Note``, ``Warning``, ``Error``, ``Fatal``}) of output +*levels* by the diagnostics subsystem based on various configuration options. +Clang internally supports a fully fine grained mapping mechanism that allows +you to map almost any diagnostic to the output level that you want. The only +diagnostics that cannot be mapped are ``NOTE``\ s, which always follow the +severity of the previously emitted diagnostic and ``ERROR``\ s, which can only +be mapped to ``Fatal`` (it is not possible to turn an error into a warning, for +example). + +Diagnostic mappings are used in many ways. For example, if the user specifies +``-pedantic``, ``EXTENSION`` maps to ``Warning``, if they specify +``-pedantic-errors``, it turns into ``Error``. This is used to implement +options like ``-Wunused_macros``, ``-Wundef`` etc. + +Mapping to ``Fatal`` should only be used for diagnostics that are considered so +severe that error recovery won't be able to recover sensibly from them (thus +spewing a ton of bogus errors). One example of this class of error are failure +to ``#include`` a file. + +The Format String +^^^^^^^^^^^^^^^^^ + +The format string for the diagnostic is very simple, but it has some power. It +takes the form of a string in English with markers that indicate where and how +arguments to the diagnostic are inserted and formatted. For example, here are +some simple format strings: + +.. code-block:: c++ + + "binary integer literals are an extension" + "format string contains '\\0' within the string body" + "more '%%' conversions than data arguments" + "invalid operands to binary expression (%0 and %1)" + "overloaded '%0' must be a %select{unary|binary|unary or binary}2 operator" + " (has %1 parameter%s1)" + +These examples show some important points of format strings. You can use any +plain ASCII character in the diagnostic string except "``%``" without a +problem, but these are C strings, so you have to use and be aware of all the C +escape sequences (as in the second example). If you want to produce a "``%``" +in the output, use the "``%%``" escape sequence, like the third diagnostic. +Finally, Clang uses the "``%...[digit]``" sequences to specify where and how +arguments to the diagnostic are formatted. + +Arguments to the diagnostic are numbered according to how they are specified by +the C++ code that :ref:`produces them <internals-producing-diag>`, and are +referenced by ``%0`` .. ``%9``. If you have more than 10 arguments to your +diagnostic, you are doing something wrong :). Unlike ``printf``, there is no +requirement that arguments to the diagnostic end up in the output in the same +order as they are specified, you could have a format string with "``%1 %0``" +that swaps them, for example. The text in between the percent and digit are +formatting instructions. If there are no instructions, the argument is just +turned into a string and substituted in. + +Here are some "best practices" for writing the English format string: + +* Keep the string short. It should ideally fit in the 80 column limit of the + ``DiagnosticKinds.td`` file. This avoids the diagnostic wrapping when + printed, and forces you to think about the important point you are conveying + with the diagnostic. +* Take advantage of location information. The user will be able to see the + line and location of the caret, so you don't need to tell them that the + problem is with the 4th argument to the function: just point to it. +* Do not capitalize the diagnostic string, and do not end it with a period. +* If you need to quote something in the diagnostic string, use single quotes. + +Diagnostics should never take random English strings as arguments: you +shouldn't use "``you have a problem with %0``" and pass in things like "``your +argument``" or "``your return value``" as arguments. Doing this prevents +:ref:`translating <internals-diag-translation>` the Clang diagnostics to other +languages (because they'll get random English words in their otherwise +localized diagnostic). The exceptions to this are C/C++ language keywords +(e.g., ``auto``, ``const``, ``mutable``, etc) and C/C++ operators (``/=``). +Note that things like "pointer" and "reference" are not keywords. On the other +hand, you *can* include anything that comes from the user's source code, +including variable names, types, labels, etc. The "``select``" format can be +used to achieve this sort of thing in a localizable way, see below. + +Formatting a Diagnostic Argument +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Arguments to diagnostics are fully typed internally, and come from a couple +different classes: integers, types, names, and random strings. Depending on +the class of the argument, it can be optionally formatted in different ways. +This gives the ``DiagnosticClient`` information about what the argument means +without requiring it to use a specific presentation (consider this MVC for +Clang :). + +Here are the different diagnostic argument formats currently supported by +Clang: + +**"s" format** + +Example: + ``"requires %1 parameter%s1"`` +Class: + Integers +Description: + This is a simple formatter for integers that is useful when producing English + diagnostics. When the integer is 1, it prints as nothing. When the integer + is not 1, it prints as "``s``". This allows some simple grammatical forms to + be to be handled correctly, and eliminates the need to use gross things like + ``"requires %1 parameter(s)"``. + +**"select" format** + +Example: + ``"must be a %select{unary|binary|unary or binary}2 operator"`` +Class: + Integers +Description: + This format specifier is used to merge multiple related diagnostics together + into one common one, without requiring the difference to be specified as an + English string argument. Instead of specifying the string, the diagnostic + gets an integer argument and the format string selects the numbered option. + In this case, the "``%2``" value must be an integer in the range [0..2]. If + it is 0, it prints "unary", if it is 1 it prints "binary" if it is 2, it + prints "unary or binary". This allows other language translations to + substitute reasonable words (or entire phrases) based on the semantics of the + diagnostic instead of having to do things textually. The selected string + does undergo formatting. + +**"plural" format** + +Example: + ``"you have %1 %plural{1:mouse|:mice}1 connected to your computer"`` +Class: + Integers +Description: + This is a formatter for complex plural forms. It is designed to handle even + the requirements of languages with very complex plural forms, as many Baltic + languages have. The argument consists of a series of expression/form pairs, + separated by ":", where the first form whose expression evaluates to true is + the result of the modifier. + + An expression can be empty, in which case it is always true. See the example + at the top. Otherwise, it is a series of one or more numeric conditions, + separated by ",". If any condition matches, the expression matches. Each + numeric condition can take one of three forms. + + * number: A simple decimal number matches if the argument is the same as the + number. Example: ``"%plural{1:mouse|:mice}4"`` + * range: A range in square brackets matches if the argument is within the + range. Then range is inclusive on both ends. Example: + ``"%plural{0:none|1:one|[2,5]:some|:many}2"`` + * modulo: A modulo operator is followed by a number, and equals sign and + either a number or a range. The tests are the same as for plain numbers + and ranges, but the argument is taken modulo the number first. Example: + ``"%plural{%100=0:even hundred|%100=[1,50]:lower half|:everything else}1"`` + + The parser is very unforgiving. A syntax error, even whitespace, will abort, + as will a failure to match the argument against any expression. + +**"ordinal" format** + +Example: + ``"ambiguity in %ordinal0 argument"`` +Class: + Integers +Description: + This is a formatter which represents the argument number as an ordinal: the + value ``1`` becomes ``1st``, ``3`` becomes ``3rd``, and so on. Values less + than ``1`` are not supported. This formatter is currently hard-coded to use + English ordinals. + +**"objcclass" format** + +Example: + ``"method %objcclass0 not found"`` +Class: + ``DeclarationName`` +Description: + This is a simple formatter that indicates the ``DeclarationName`` corresponds + to an Objective-C class method selector. As such, it prints the selector + with a leading "``+``". + +**"objcinstance" format** + +Example: + ``"method %objcinstance0 not found"`` +Class: + ``DeclarationName`` +Description: + This is a simple formatter that indicates the ``DeclarationName`` corresponds + to an Objective-C instance method selector. As such, it prints the selector + with a leading "``-``". + +**"q" format** + +Example: + ``"candidate found by name lookup is %q0"`` +Class: + ``NamedDecl *`` +Description: + This formatter indicates that the fully-qualified name of the declaration + should be printed, e.g., "``std::vector``" rather than "``vector``". + +**"diff" format** + +Example: + ``"no known conversion %diff{from $ to $|from argument type to parameter type}1,2"`` +Class: + ``QualType`` +Description: + This formatter takes two ``QualType``\ s and attempts to print a template + difference between the two. If tree printing is off, the text inside the + braces before the pipe is printed, with the formatted text replacing the $. + If tree printing is on, the text after the pipe is printed and a type tree is + printed after the diagnostic message. + +It is really easy to add format specifiers to the Clang diagnostics system, but +they should be discussed before they are added. If you are creating a lot of +repetitive diagnostics and/or have an idea for a useful formatter, please bring +it up on the cfe-dev mailing list. + +.. _internals-producing-diag: + +Producing the Diagnostic +^^^^^^^^^^^^^^^^^^^^^^^^ + +Now that you've created the diagnostic in the ``Diagnostic*Kinds.td`` file, you +need to write the code that detects the condition in question and emits the new +diagnostic. Various components of Clang (e.g., the preprocessor, ``Sema``, +etc.) provide a helper function named "``Diag``". It creates a diagnostic and +accepts the arguments, ranges, and other information that goes along with it. + +For example, the binary expression error comes from code like this: + +.. code-block:: c++ + + if (various things that are bad) + Diag(Loc, diag::err_typecheck_invalid_operands) + << lex->getType() << rex->getType() + << lex->getSourceRange() << rex->getSourceRange(); + +This shows that use of the ``Diag`` method: it takes a location (a +:ref:`SourceLocation <SourceLocation>` object) and a diagnostic enum value +(which matches the name from ``Diagnostic*Kinds.td``). If the diagnostic takes +arguments, they are specified with the ``<<`` operator: the first argument +becomes ``%0``, the second becomes ``%1``, etc. The diagnostic interface +allows you to specify arguments of many different types, including ``int`` and +``unsigned`` for integer arguments, ``const char*`` and ``std::string`` for +string arguments, ``DeclarationName`` and ``const IdentifierInfo *`` for names, +``QualType`` for types, etc. ``SourceRange``\ s are also specified with the +``<<`` operator, but do not have a specific ordering requirement. + +As you can see, adding and producing a diagnostic is pretty straightforward. +The hard part is deciding exactly what you need to say to help the user, +picking a suitable wording, and providing the information needed to format it +correctly. The good news is that the call site that issues a diagnostic should +be completely independent of how the diagnostic is formatted and in what +language it is rendered. + +Fix-It Hints +^^^^^^^^^^^^ + +In some cases, the front end emits diagnostics when it is clear that some small +change to the source code would fix the problem. For example, a missing +semicolon at the end of a statement or a use of deprecated syntax that is +easily rewritten into a more modern form. Clang tries very hard to emit the +diagnostic and recover gracefully in these and other cases. + +However, for these cases where the fix is obvious, the diagnostic can be +annotated with a hint (referred to as a "fix-it hint") that describes how to +change the code referenced by the diagnostic to fix the problem. For example, +it might add the missing semicolon at the end of the statement or rewrite the +use of a deprecated construct into something more palatable. Here is one such +example from the C++ front end, where we warn about the right-shift operator +changing meaning from C++98 to C++11: + +.. code-block:: c++ + + test.cpp:3:7: warning: use of right-shift operator ('>>') in template argument + will require parentheses in C++11 + A<100 >> 2> *a; + ^ + ( ) + +Here, the fix-it hint is suggesting that parentheses be added, and showing +exactly where those parentheses would be inserted into the source code. The +fix-it hints themselves describe what changes to make to the source code in an +abstract manner, which the text diagnostic printer renders as a line of +"insertions" below the caret line. :ref:`Other diagnostic clients +<DiagnosticClient>` might choose to render the code differently (e.g., as +markup inline) or even give the user the ability to automatically fix the +problem. + +Fix-it hints on errors and warnings need to obey these rules: + +* Since they are automatically applied if ``-Xclang -fixit`` is passed to the + driver, they should only be used when it's very likely they match the user's + intent. +* Clang must recover from errors as if the fix-it had been applied. + +If a fix-it can't obey these rules, put the fix-it on a note. Fix-its on notes +are not applied automatically. + +All fix-it hints are described by the ``FixItHint`` class, instances of which +should be attached to the diagnostic using the ``<<`` operator in the same way +that highlighted source ranges and arguments are passed to the diagnostic. +Fix-it hints can be created with one of three constructors: + +* ``FixItHint::CreateInsertion(Loc, Code)`` + + Specifies that the given ``Code`` (a string) should be inserted before the + source location ``Loc``. + +* ``FixItHint::CreateRemoval(Range)`` + + Specifies that the code in the given source ``Range`` should be removed. + +* ``FixItHint::CreateReplacement(Range, Code)`` + + Specifies that the code in the given source ``Range`` should be removed, + and replaced with the given ``Code`` string. + +.. _DiagnosticClient: + +The ``DiagnosticClient`` Interface +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Once code generates a diagnostic with all of the arguments and the rest of the +relevant information, Clang needs to know what to do with it. As previously +mentioned, the diagnostic machinery goes through some filtering to map a +severity onto a diagnostic level, then (assuming the diagnostic is not mapped +to "``Ignore``") it invokes an object that implements the ``DiagnosticClient`` +interface with the information. + +It is possible to implement this interface in many different ways. For +example, the normal Clang ``DiagnosticClient`` (named +``TextDiagnosticPrinter``) turns the arguments into strings (according to the +various formatting rules), prints out the file/line/column information and the +string, then prints out the line of code, the source ranges, and the caret. +However, this behavior isn't required. + +Another implementation of the ``DiagnosticClient`` interface is the +``TextDiagnosticBuffer`` class, which is used when Clang is in ``-verify`` +mode. Instead of formatting and printing out the diagnostics, this +implementation just captures and remembers the diagnostics as they fly by. +Then ``-verify`` compares the list of produced diagnostics to the list of +expected ones. If they disagree, it prints out its own output. Full +documentation for the ``-verify`` mode can be found in the Clang API +documentation for `VerifyDiagnosticConsumer +</doxygen/classclang_1_1VerifyDiagnosticConsumer.html#details>`_. + +There are many other possible implementations of this interface, and this is +why we prefer diagnostics to pass down rich structured information in +arguments. For example, an HTML output might want declaration names be +linkified to where they come from in the source. Another example is that a GUI +might let you click on typedefs to expand them. This application would want to +pass significantly more information about types through to the GUI than a +simple flat string. The interface allows this to happen. + +.. _internals-diag-translation: + +Adding Translations to Clang +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Not possible yet! Diagnostic strings should be written in UTF-8, the client can +translate to the relevant code page if needed. Each translation completely +replaces the format string for the diagnostic. + +.. _SourceLocation: +.. _SourceManager: + +The ``SourceLocation`` and ``SourceManager`` classes +---------------------------------------------------- + +Strangely enough, the ``SourceLocation`` class represents a location within the +source code of the program. Important design points include: + +#. ``sizeof(SourceLocation)`` must be extremely small, as these are embedded + into many AST nodes and are passed around often. Currently it is 32 bits. +#. ``SourceLocation`` must be a simple value object that can be efficiently + copied. +#. We should be able to represent a source location for any byte of any input + file. This includes in the middle of tokens, in whitespace, in trigraphs, + etc. +#. A ``SourceLocation`` must encode the current ``#include`` stack that was + active when the location was processed. For example, if the location + corresponds to a token, it should contain the set of ``#include``\ s active + when the token was lexed. This allows us to print the ``#include`` stack + for a diagnostic. +#. ``SourceLocation`` must be able to describe macro expansions, capturing both + the ultimate instantiation point and the source of the original character + data. + +In practice, the ``SourceLocation`` works together with the ``SourceManager`` +class to encode two pieces of information about a location: its spelling +location and its instantiation location. For most tokens, these will be the +same. However, for a macro expansion (or tokens that came from a ``_Pragma`` +directive) these will describe the location of the characters corresponding to +the token and the location where the token was used (i.e., the macro +instantiation point or the location of the ``_Pragma`` itself). + +The Clang front-end inherently depends on the location of a token being tracked +correctly. If it is ever incorrect, the front-end may get confused and die. +The reason for this is that the notion of the "spelling" of a ``Token`` in +Clang depends on being able to find the original input characters for the +token. This concept maps directly to the "spelling location" for the token. + +``SourceRange`` and ``CharSourceRange`` +--------------------------------------- + +.. mostly taken from http://lists.cs.uiuc.edu/pipermail/cfe-dev/2010-August/010595.html + +Clang represents most source ranges by [first, last], where "first" and "last" +each point to the beginning of their respective tokens. For example consider +the ``SourceRange`` of the following statement: + +.. code-block:: c++ + + x = foo + bar; + ^first ^last + +To map from this representation to a character-based representation, the "last" +location needs to be adjusted to point to (or past) the end of that token with +either ``Lexer::MeasureTokenLength()`` or ``Lexer::getLocForEndOfToken()``. For +the rare cases where character-level source ranges information is needed we use +the ``CharSourceRange`` class. + +The Driver Library +================== + +The clang Driver and library are documented :doc:`here <DriverInternals>`. + +Precompiled Headers +=================== + +Clang supports two implementations of precompiled headers. The default +implementation, precompiled headers (:doc:`PCH <PCHInternals>`) uses a +serialized representation of Clang's internal data structures, encoded with the +`LLVM bitstream format <http://llvm.org/docs/BitCodeFormat.html>`_. +Pretokenized headers (:doc:`PTH <PTHInternals>`), on the other hand, contain a +serialized representation of the tokens encountered when preprocessing a header +(and anything that header includes). + +The Frontend Library +==================== + +The Frontend library contains functionality useful for building tools on top of +the Clang libraries, for example several methods for outputting diagnostics. + +The Lexer and Preprocessor Library +================================== + +The Lexer library contains several tightly-connected classes that are involved +with the nasty process of lexing and preprocessing C source code. The main +interface to this library for outside clients is the large ``Preprocessor`` +class. It contains the various pieces of state that are required to coherently +read tokens out of a translation unit. + +The core interface to the ``Preprocessor`` object (once it is set up) is the +``Preprocessor::Lex`` method, which returns the next :ref:`Token <Token>` from +the preprocessor stream. There are two types of token providers that the +preprocessor is capable of reading from: a buffer lexer (provided by the +:ref:`Lexer <Lexer>` class) and a buffered token stream (provided by the +:ref:`TokenLexer <TokenLexer>` class). + +.. _Token: + +The Token class +--------------- + +The ``Token`` class is used to represent a single lexed token. Tokens are +intended to be used by the lexer/preprocess and parser libraries, but are not +intended to live beyond them (for example, they should not live in the ASTs). + +Tokens most often live on the stack (or some other location that is efficient +to access) as the parser is running, but occasionally do get buffered up. For +example, macro definitions are stored as a series of tokens, and the C++ +front-end periodically needs to buffer tokens up for tentative parsing and +various pieces of look-ahead. As such, the size of a ``Token`` matters. On a +32-bit system, ``sizeof(Token)`` is currently 16 bytes. + +Tokens occur in two forms: :ref:`annotation tokens <AnnotationToken>` and +normal tokens. Normal tokens are those returned by the lexer, annotation +tokens represent semantic information and are produced by the parser, replacing +normal tokens in the token stream. Normal tokens contain the following +information: + +* **A SourceLocation** --- This indicates the location of the start of the + token. + +* **A length** --- This stores the length of the token as stored in the + ``SourceBuffer``. For tokens that include them, this length includes + trigraphs and escaped newlines which are ignored by later phases of the + compiler. By pointing into the original source buffer, it is always possible + to get the original spelling of a token completely accurately. + +* **IdentifierInfo** --- If a token takes the form of an identifier, and if + identifier lookup was enabled when the token was lexed (e.g., the lexer was + not reading in "raw" mode) this contains a pointer to the unique hash value + for the identifier. Because the lookup happens before keyword + identification, this field is set even for language keywords like "``for``". + +* **TokenKind** --- This indicates the kind of token as classified by the + lexer. This includes things like ``tok::starequal`` (for the "``*=``" + operator), ``tok::ampamp`` for the "``&&``" token, and keyword values (e.g., + ``tok::kw_for``) for identifiers that correspond to keywords. Note that + some tokens can be spelled multiple ways. For example, C++ supports + "operator keywords", where things like "``and``" are treated exactly like the + "``&&``" operator. In these cases, the kind value is set to ``tok::ampamp``, + which is good for the parser, which doesn't have to consider both forms. For + something that cares about which form is used (e.g., the preprocessor + "stringize" operator) the spelling indicates the original form. + +* **Flags** --- There are currently four flags tracked by the + lexer/preprocessor system on a per-token basis: + + #. **StartOfLine** --- This was the first token that occurred on its input + source line. + #. **LeadingSpace** --- There was a space character either immediately before + the token or transitively before the token as it was expanded through a + macro. The definition of this flag is very closely defined by the + stringizing requirements of the preprocessor. + #. **DisableExpand** --- This flag is used internally to the preprocessor to + represent identifier tokens which have macro expansion disabled. This + prevents them from being considered as candidates for macro expansion ever + in the future. + #. **NeedsCleaning** --- This flag is set if the original spelling for the + token includes a trigraph or escaped newline. Since this is uncommon, + many pieces of code can fast-path on tokens that did not need cleaning. + +One interesting (and somewhat unusual) aspect of normal tokens is that they +don't contain any semantic information about the lexed value. For example, if +the token was a pp-number token, we do not represent the value of the number +that was lexed (this is left for later pieces of code to decide). +Additionally, the lexer library has no notion of typedef names vs variable +names: both are returned as identifiers, and the parser is left to decide +whether a specific identifier is a typedef or a variable (tracking this +requires scope information among other things). The parser can do this +translation by replacing tokens returned by the preprocessor with "Annotation +Tokens". + +.. _AnnotationToken: + +Annotation Tokens +----------------- + +Annotation tokens are tokens that are synthesized by the parser and injected +into the preprocessor's token stream (replacing existing tokens) to record +semantic information found by the parser. For example, if "``foo``" is found +to be a typedef, the "``foo``" ``tok::identifier`` token is replaced with an +``tok::annot_typename``. This is useful for a couple of reasons: 1) this makes +it easy to handle qualified type names (e.g., "``foo::bar::baz<42>::t``") in +C++ as a single "token" in the parser. 2) if the parser backtracks, the +reparse does not need to redo semantic analysis to determine whether a token +sequence is a variable, type, template, etc. + +Annotation tokens are created by the parser and reinjected into the parser's +token stream (when backtracking is enabled). Because they can only exist in +tokens that the preprocessor-proper is done with, it doesn't need to keep +around flags like "start of line" that the preprocessor uses to do its job. +Additionally, an annotation token may "cover" a sequence of preprocessor tokens +(e.g., "``a::b::c``" is five preprocessor tokens). As such, the valid fields +of an annotation token are different than the fields for a normal token (but +they are multiplexed into the normal ``Token`` fields): + +* **SourceLocation "Location"** --- The ``SourceLocation`` for the annotation + token indicates the first token replaced by the annotation token. In the + example above, it would be the location of the "``a``" identifier. +* **SourceLocation "AnnotationEndLoc"** --- This holds the location of the last + token replaced with the annotation token. In the example above, it would be + the location of the "``c``" identifier. +* **void* "AnnotationValue"** --- This contains an opaque object that the + parser gets from ``Sema``. The parser merely preserves the information for + ``Sema`` to later interpret based on the annotation token kind. +* **TokenKind "Kind"** --- This indicates the kind of Annotation token this is. + See below for the different valid kinds. + +Annotation tokens currently come in three kinds: + +#. **tok::annot_typename**: This annotation token represents a resolved + typename token that is potentially qualified. The ``AnnotationValue`` field + contains the ``QualType`` returned by ``Sema::getTypeName()``, possibly with + source location information attached. +#. **tok::annot_cxxscope**: This annotation token represents a C++ scope + specifier, such as "``A::B::``". This corresponds to the grammar + productions "*::*" and "*:: [opt] nested-name-specifier*". The + ``AnnotationValue`` pointer is a ``NestedNameSpecifier *`` returned by the + ``Sema::ActOnCXXGlobalScopeSpecifier`` and + ``Sema::ActOnCXXNestedNameSpecifier`` callbacks. +#. **tok::annot_template_id**: This annotation token represents a C++ + template-id such as "``foo<int, 4>``", where "``foo``" is the name of a + template. The ``AnnotationValue`` pointer is a pointer to a ``malloc``'d + ``TemplateIdAnnotation`` object. Depending on the context, a parsed + template-id that names a type might become a typename annotation token (if + all we care about is the named type, e.g., because it occurs in a type + specifier) or might remain a template-id token (if we want to retain more + source location information or produce a new type, e.g., in a declaration of + a class template specialization). template-id annotation tokens that refer + to a type can be "upgraded" to typename annotation tokens by the parser. + +As mentioned above, annotation tokens are not returned by the preprocessor, +they are formed on demand by the parser. This means that the parser has to be +aware of cases where an annotation could occur and form it where appropriate. +This is somewhat similar to how the parser handles Translation Phase 6 of C99: +String Concatenation (see C99 5.1.1.2). In the case of string concatenation, +the preprocessor just returns distinct ``tok::string_literal`` and +``tok::wide_string_literal`` tokens and the parser eats a sequence of them +wherever the grammar indicates that a string literal can occur. + +In order to do this, whenever the parser expects a ``tok::identifier`` or +``tok::coloncolon``, it should call the ``TryAnnotateTypeOrScopeToken`` or +``TryAnnotateCXXScopeToken`` methods to form the annotation token. These +methods will maximally form the specified annotation tokens and replace the +current token with them, if applicable. If the current tokens is not valid for +an annotation token, it will remain an identifier or "``::``" token. + +.. _Lexer: + +The ``Lexer`` class +------------------- + +The ``Lexer`` class provides the mechanics of lexing tokens out of a source +buffer and deciding what they mean. The ``Lexer`` is complicated by the fact +that it operates on raw buffers that have not had spelling eliminated (this is +a necessity to get decent performance), but this is countered with careful +coding as well as standard performance techniques (for example, the comment +handling code is vectorized on X86 and PowerPC hosts). + +The lexer has a couple of interesting modal features: + +* The lexer can operate in "raw" mode. This mode has several features that + make it possible to quickly lex the file (e.g., it stops identifier lookup, + doesn't specially handle preprocessor tokens, handles EOF differently, etc). + This mode is used for lexing within an "``#if 0``" block, for example. +* The lexer can capture and return comments as tokens. This is required to + support the ``-C`` preprocessor mode, which passes comments through, and is + used by the diagnostic checker to identifier expect-error annotations. +* The lexer can be in ``ParsingFilename`` mode, which happens when + preprocessing after reading a ``#include`` directive. This mode changes the + parsing of "``<``" to return an "angled string" instead of a bunch of tokens + for each thing within the filename. +* When parsing a preprocessor directive (after "``#``") the + ``ParsingPreprocessorDirective`` mode is entered. This changes the parser to + return EOD at a newline. +* The ``Lexer`` uses a ``LangOptions`` object to know whether trigraphs are + enabled, whether C++ or ObjC keywords are recognized, etc. + +In addition to these modes, the lexer keeps track of a couple of other features +that are local to a lexed buffer, which change as the buffer is lexed: + +* The ``Lexer`` uses ``BufferPtr`` to keep track of the current character being + lexed. +* The ``Lexer`` uses ``IsAtStartOfLine`` to keep track of whether the next + lexed token will start with its "start of line" bit set. +* The ``Lexer`` keeps track of the current "``#if``" directives that are active + (which can be nested). +* The ``Lexer`` keeps track of an :ref:`MultipleIncludeOpt + <MultipleIncludeOpt>` object, which is used to detect whether the buffer uses + the standard "``#ifndef XX`` / ``#define XX``" idiom to prevent multiple + inclusion. If a buffer does, subsequent includes can be ignored if the + "``XX``" macro is defined. + +.. _TokenLexer: + +The ``TokenLexer`` class +------------------------ + +The ``TokenLexer`` class is a token provider that returns tokens from a list of +tokens that came from somewhere else. It typically used for two things: 1) +returning tokens from a macro definition as it is being expanded 2) returning +tokens from an arbitrary buffer of tokens. The later use is used by +``_Pragma`` and will most likely be used to handle unbounded look-ahead for the +C++ parser. + +.. _MultipleIncludeOpt: + +The ``MultipleIncludeOpt`` class +-------------------------------- + +The ``MultipleIncludeOpt`` class implements a really simple little state +machine that is used to detect the standard "``#ifndef XX`` / ``#define XX``" +idiom that people typically use to prevent multiple inclusion of headers. If a +buffer uses this idiom and is subsequently ``#include``'d, the preprocessor can +simply check to see whether the guarding condition is defined or not. If so, +the preprocessor can completely ignore the include of the header. + +The Parser Library +================== + +The AST Library +=============== + +.. _Type: + +The ``Type`` class and its subclasses +------------------------------------- + +The ``Type`` class (and its subclasses) are an important part of the AST. +Types are accessed through the ``ASTContext`` class, which implicitly creates +and uniques them as they are needed. Types have a couple of non-obvious +features: 1) they do not capture type qualifiers like ``const`` or ``volatile`` +(see :ref:`QualType <QualType>`), and 2) they implicitly capture typedef +information. Once created, types are immutable (unlike decls). + +Typedefs in C make semantic analysis a bit more complex than it would be without +them. The issue is that we want to capture typedef information and represent it +in the AST perfectly, but the semantics of operations need to "see through" +typedefs. For example, consider this code: + +.. code-block:: c++ + + void func() { + typedef int foo; + foo X, *Y; + typedef foo *bar; + bar Z; + *X; // error + **Y; // error + **Z; // error + } + +The code above is illegal, and thus we expect there to be diagnostics emitted +on the annotated lines. In this example, we expect to get: + +.. code-block:: c++ + + test.c:6:1: error: indirection requires pointer operand ('foo' invalid) + *X; // error + ^~ + test.c:7:1: error: indirection requires pointer operand ('foo' invalid) + **Y; // error + ^~~ + test.c:8:1: error: indirection requires pointer operand ('foo' invalid) + **Z; // error + ^~~ + +While this example is somewhat silly, it illustrates the point: we want to +retain typedef information where possible, so that we can emit errors about +"``std::string``" instead of "``std::basic_string<char, std:...``". Doing this +requires properly keeping typedef information (for example, the type of ``X`` +is "``foo``", not "``int``"), and requires properly propagating it through the +various operators (for example, the type of ``*Y`` is "``foo``", not +"``int``"). In order to retain this information, the type of these expressions +is an instance of the ``TypedefType`` class, which indicates that the type of +these expressions is a typedef for "``foo``". + +Representing types like this is great for diagnostics, because the +user-specified type is always immediately available. There are two problems +with this: first, various semantic checks need to make judgements about the +*actual structure* of a type, ignoring typedefs. Second, we need an efficient +way to query whether two types are structurally identical to each other, +ignoring typedefs. The solution to both of these problems is the idea of +canonical types. + +Canonical Types +^^^^^^^^^^^^^^^ + +Every instance of the ``Type`` class contains a canonical type pointer. For +simple types with no typedefs involved (e.g., "``int``", "``int*``", +"``int**``"), the type just points to itself. For types that have a typedef +somewhere in their structure (e.g., "``foo``", "``foo*``", "``foo**``", +"``bar``"), the canonical type pointer points to their structurally equivalent +type without any typedefs (e.g., "``int``", "``int*``", "``int**``", and +"``int*``" respectively). + +This design provides a constant time operation (dereferencing the canonical type +pointer) that gives us access to the structure of types. For example, we can +trivially tell that "``bar``" and "``foo*``" are the same type by dereferencing +their canonical type pointers and doing a pointer comparison (they both point +to the single "``int*``" type). + +Canonical types and typedef types bring up some complexities that must be +carefully managed. Specifically, the ``isa``/``cast``/``dyn_cast`` operators +generally shouldn't be used in code that is inspecting the AST. For example, +when type checking the indirection operator (unary "``*``" on a pointer), the +type checker must verify that the operand has a pointer type. It would not be +correct to check that with "``isa<PointerType>(SubExpr->getType())``", because +this predicate would fail if the subexpression had a typedef type. + +The solution to this problem are a set of helper methods on ``Type``, used to +check their properties. In this case, it would be correct to use +"``SubExpr->getType()->isPointerType()``" to do the check. This predicate will +return true if the *canonical type is a pointer*, which is true any time the +type is structurally a pointer type. The only hard part here is remembering +not to use the ``isa``/``cast``/``dyn_cast`` operations. + +The second problem we face is how to get access to the pointer type once we +know it exists. To continue the example, the result type of the indirection +operator is the pointee type of the subexpression. In order to determine the +type, we need to get the instance of ``PointerType`` that best captures the +typedef information in the program. If the type of the expression is literally +a ``PointerType``, we can return that, otherwise we have to dig through the +typedefs to find the pointer type. For example, if the subexpression had type +"``foo*``", we could return that type as the result. If the subexpression had +type "``bar``", we want to return "``foo*``" (note that we do *not* want +"``int*``"). In order to provide all of this, ``Type`` has a +``getAsPointerType()`` method that checks whether the type is structurally a +``PointerType`` and, if so, returns the best one. If not, it returns a null +pointer. + +This structure is somewhat mystical, but after meditating on it, it will make +sense to you :). + +.. _QualType: + +The ``QualType`` class +---------------------- + +The ``QualType`` class is designed as a trivial value class that is small, +passed by-value and is efficient to query. The idea of ``QualType`` is that it +stores the type qualifiers (``const``, ``volatile``, ``restrict``, plus some +extended qualifiers required by language extensions) separately from the types +themselves. ``QualType`` is conceptually a pair of "``Type*``" and the bits +for these type qualifiers. + +By storing the type qualifiers as bits in the conceptual pair, it is extremely +efficient to get the set of qualifiers on a ``QualType`` (just return the field +of the pair), add a type qualifier (which is a trivial constant-time operation +that sets a bit), and remove one or more type qualifiers (just return a +``QualType`` with the bitfield set to empty). + +Further, because the bits are stored outside of the type itself, we do not need +to create duplicates of types with different sets of qualifiers (i.e. there is +only a single heap allocated "``int``" type: "``const int``" and "``volatile +const int``" both point to the same heap allocated "``int``" type). This +reduces the heap size used to represent bits and also means we do not have to +consider qualifiers when uniquing types (:ref:`Type <Type>` does not even +contain qualifiers). + +In practice, the two most common type qualifiers (``const`` and ``restrict``) +are stored in the low bits of the pointer to the ``Type`` object, together with +a flag indicating whether extended qualifiers are present (which must be +heap-allocated). This means that ``QualType`` is exactly the same size as a +pointer. + +.. _DeclarationName: + +Declaration names +----------------- + +The ``DeclarationName`` class represents the name of a declaration in Clang. +Declarations in the C family of languages can take several different forms. +Most declarations are named by simple identifiers, e.g., "``f``" and "``x``" in +the function declaration ``f(int x)``. In C++, declaration names can also name +class constructors ("``Class``" in ``struct Class { Class(); }``), class +destructors ("``~Class``"), overloaded operator names ("``operator+``"), and +conversion functions ("``operator void const *``"). In Objective-C, +declaration names can refer to the names of Objective-C methods, which involve +the method name and the parameters, collectively called a *selector*, e.g., +"``setWidth:height:``". Since all of these kinds of entities --- variables, +functions, Objective-C methods, C++ constructors, destructors, and operators +--- are represented as subclasses of Clang's common ``NamedDecl`` class, +``DeclarationName`` is designed to efficiently represent any kind of name. + +Given a ``DeclarationName`` ``N``, ``N.getNameKind()`` will produce a value +that describes what kind of name ``N`` stores. There are 8 options (all of the +names are inside the ``DeclarationName`` class). + +``Identifier`` + + The name is a simple identifier. Use ``N.getAsIdentifierInfo()`` to retrieve + the corresponding ``IdentifierInfo*`` pointing to the actual identifier. + Note that C++ overloaded operators (e.g., "``operator+``") are represented as + special kinds of identifiers. Use ``IdentifierInfo``'s + ``getOverloadedOperatorID`` function to determine whether an identifier is an + overloaded operator name. + +``ObjCZeroArgSelector``, ``ObjCOneArgSelector``, ``ObjCMultiArgSelector`` + + The name is an Objective-C selector, which can be retrieved as a ``Selector`` + instance via ``N.getObjCSelector()``. The three possible name kinds for + Objective-C reflect an optimization within the ``DeclarationName`` class: + both zero- and one-argument selectors are stored as a masked + ``IdentifierInfo`` pointer, and therefore require very little space, since + zero- and one-argument selectors are far more common than multi-argument + selectors (which use a different structure). + +``CXXConstructorName`` + + The name is a C++ constructor name. Use ``N.getCXXNameType()`` to retrieve + the :ref:`type <QualType>` that this constructor is meant to construct. The + type is always the canonical type, since all constructors for a given type + have the same name. + +``CXXDestructorName`` + + The name is a C++ destructor name. Use ``N.getCXXNameType()`` to retrieve + the :ref:`type <QualType>` whose destructor is being named. This type is + always a canonical type. + +``CXXConversionFunctionName`` + + The name is a C++ conversion function. Conversion functions are named + according to the type they convert to, e.g., "``operator void const *``". + Use ``N.getCXXNameType()`` to retrieve the type that this conversion function + converts to. This type is always a canonical type. + +``CXXOperatorName`` + + The name is a C++ overloaded operator name. Overloaded operators are named + according to their spelling, e.g., "``operator+``" or "``operator new []``". + Use ``N.getCXXOverloadedOperator()`` to retrieve the overloaded operator (a + value of type ``OverloadedOperatorKind``). + +``DeclarationName``\ s are cheap to create, copy, and compare. They require +only a single pointer's worth of storage in the common cases (identifiers, +zero- and one-argument Objective-C selectors) and use dense, uniqued storage +for the other kinds of names. Two ``DeclarationName``\ s can be compared for +equality (``==``, ``!=``) using a simple bitwise comparison, can be ordered +with ``<``, ``>``, ``<=``, and ``>=`` (which provide a lexicographical ordering +for normal identifiers but an unspecified ordering for other kinds of names), +and can be placed into LLVM ``DenseMap``\ s and ``DenseSet``\ s. + +``DeclarationName`` instances can be created in different ways depending on +what kind of name the instance will store. Normal identifiers +(``IdentifierInfo`` pointers) and Objective-C selectors (``Selector``) can be +implicitly converted to ``DeclarationNames``. Names for C++ constructors, +destructors, conversion functions, and overloaded operators can be retrieved +from the ``DeclarationNameTable``, an instance of which is available as +``ASTContext::DeclarationNames``. The member functions +``getCXXConstructorName``, ``getCXXDestructorName``, +``getCXXConversionFunctionName``, and ``getCXXOperatorName``, respectively, +return ``DeclarationName`` instances for the four kinds of C++ special function +names. + +.. _DeclContext: + +Declaration contexts +-------------------- + +Every declaration in a program exists within some *declaration context*, such +as a translation unit, namespace, class, or function. Declaration contexts in +Clang are represented by the ``DeclContext`` class, from which the various +declaration-context AST nodes (``TranslationUnitDecl``, ``NamespaceDecl``, +``RecordDecl``, ``FunctionDecl``, etc.) will derive. The ``DeclContext`` class +provides several facilities common to each declaration context: + +Source-centric vs. Semantics-centric View of Declarations + + ``DeclContext`` provides two views of the declarations stored within a + declaration context. The source-centric view accurately represents the + program source code as written, including multiple declarations of entities + where present (see the section :ref:`Redeclarations and Overloads + <Redeclarations>`), while the semantics-centric view represents the program + semantics. The two views are kept synchronized by semantic analysis while + the ASTs are being constructed. + +Storage of declarations within that context + + Every declaration context can contain some number of declarations. For + example, a C++ class (represented by ``RecordDecl``) contains various member + functions, fields, nested types, and so on. All of these declarations will + be stored within the ``DeclContext``, and one can iterate over the + declarations via [``DeclContext::decls_begin()``, + ``DeclContext::decls_end()``). This mechanism provides the source-centric + view of declarations in the context. + +Lookup of declarations within that context + + The ``DeclContext`` structure provides efficient name lookup for names within + that declaration context. For example, if ``N`` is a namespace we can look + for the name ``N::f`` using ``DeclContext::lookup``. The lookup itself is + based on a lazily-constructed array (for declaration contexts with a small + number of declarations) or hash table (for declaration contexts with more + declarations). The lookup operation provides the semantics-centric view of + the declarations in the context. + +Ownership of declarations + + The ``DeclContext`` owns all of the declarations that were declared within + its declaration context, and is responsible for the management of their + memory as well as their (de-)serialization. + +All declarations are stored within a declaration context, and one can query +information about the context in which each declaration lives. One can +retrieve the ``DeclContext`` that contains a particular ``Decl`` using +``Decl::getDeclContext``. However, see the section +:ref:`LexicalAndSemanticContexts` for more information about how to interpret +this context information. + +.. _Redeclarations: + +Redeclarations and Overloads +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Within a translation unit, it is common for an entity to be declared several +times. For example, we might declare a function "``f``" and then later +re-declare it as part of an inlined definition: + +.. code-block:: c++ + + void f(int x, int y, int z = 1); + + inline void f(int x, int y, int z) { /* ... */ } + +The representation of "``f``" differs in the source-centric and +semantics-centric views of a declaration context. In the source-centric view, +all redeclarations will be present, in the order they occurred in the source +code, making this view suitable for clients that wish to see the structure of +the source code. In the semantics-centric view, only the most recent "``f``" +will be found by the lookup, since it effectively replaces the first +declaration of "``f``". + +In the semantics-centric view, overloading of functions is represented +explicitly. For example, given two declarations of a function "``g``" that are +overloaded, e.g., + +.. code-block:: c++ + + void g(); + void g(int); + +the ``DeclContext::lookup`` operation will return a +``DeclContext::lookup_result`` that contains a range of iterators over +declarations of "``g``". Clients that perform semantic analysis on a program +that is not concerned with the actual source code will primarily use this +semantics-centric view. + +.. _LexicalAndSemanticContexts: + +Lexical and Semantic Contexts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Each declaration has two potentially different declaration contexts: a +*lexical* context, which corresponds to the source-centric view of the +declaration context, and a *semantic* context, which corresponds to the +semantics-centric view. The lexical context is accessible via +``Decl::getLexicalDeclContext`` while the semantic context is accessible via +``Decl::getDeclContext``, both of which return ``DeclContext`` pointers. For +most declarations, the two contexts are identical. For example: + +.. code-block:: c++ + + class X { + public: + void f(int x); + }; + +Here, the semantic and lexical contexts of ``X::f`` are the ``DeclContext`` +associated with the class ``X`` (itself stored as a ``RecordDecl`` AST node). +However, we can now define ``X::f`` out-of-line: + +.. code-block:: c++ + + void X::f(int x = 17) { /* ... */ } + +This definition of "``f``" has different lexical and semantic contexts. The +lexical context corresponds to the declaration context in which the actual +declaration occurred in the source code, e.g., the translation unit containing +``X``. Thus, this declaration of ``X::f`` can be found by traversing the +declarations provided by [``decls_begin()``, ``decls_end()``) in the +translation unit. + +The semantic context of ``X::f`` corresponds to the class ``X``, since this +member function is (semantically) a member of ``X``. Lookup of the name ``f`` +into the ``DeclContext`` associated with ``X`` will then return the definition +of ``X::f`` (including information about the default argument). + +Transparent Declaration Contexts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In C and C++, there are several contexts in which names that are logically +declared inside another declaration will actually "leak" out into the enclosing +scope from the perspective of name lookup. The most obvious instance of this +behavior is in enumeration types, e.g., + +.. code-block:: c++ + + enum Color { + Red, + Green, + Blue + }; + +Here, ``Color`` is an enumeration, which is a declaration context that contains +the enumerators ``Red``, ``Green``, and ``Blue``. Thus, traversing the list of +declarations contained in the enumeration ``Color`` will yield ``Red``, +``Green``, and ``Blue``. However, outside of the scope of ``Color`` one can +name the enumerator ``Red`` without qualifying the name, e.g., + +.. code-block:: c++ + + Color c = Red; + +There are other entities in C++ that provide similar behavior. For example, +linkage specifications that use curly braces: + +.. code-block:: c++ + + extern "C" { + void f(int); + void g(int); + } + // f and g are visible here + +For source-level accuracy, we treat the linkage specification and enumeration +type as a declaration context in which its enclosed declarations ("``Red``", +"``Green``", and "``Blue``"; "``f``" and "``g``") are declared. However, these +declarations are visible outside of the scope of the declaration context. + +These language features (and several others, described below) have roughly the +same set of requirements: declarations are declared within a particular lexical +context, but the declarations are also found via name lookup in scopes +enclosing the declaration itself. This feature is implemented via +*transparent* declaration contexts (see +``DeclContext::isTransparentContext()``), whose declarations are visible in the +nearest enclosing non-transparent declaration context. This means that the +lexical context of the declaration (e.g., an enumerator) will be the +transparent ``DeclContext`` itself, as will the semantic context, but the +declaration will be visible in every outer context up to and including the +first non-transparent declaration context (since transparent declaration +contexts can be nested). + +The transparent ``DeclContext``\ s are: + +* Enumerations (but not C++11 "scoped enumerations"): + + .. code-block:: c++ + + enum Color { + Red, + Green, + Blue + }; + // Red, Green, and Blue are in scope + +* C++ linkage specifications: + + .. code-block:: c++ + + extern "C" { + void f(int); + void g(int); + } + // f and g are in scope + +* Anonymous unions and structs: + + .. code-block:: c++ + + struct LookupTable { + bool IsVector; + union { + std::vector<Item> *Vector; + std::set<Item> *Set; + }; + }; + + LookupTable LT; + LT.Vector = 0; // Okay: finds Vector inside the unnamed union + +* C++11 inline namespaces: + + .. code-block:: c++ + + namespace mylib { + inline namespace debug { + class X; + } + } + mylib::X *xp; // okay: mylib::X refers to mylib::debug::X + +.. _MultiDeclContext: + +Multiply-Defined Declaration Contexts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +C++ namespaces have the interesting --- and, so far, unique --- property that +the namespace can be defined multiple times, and the declarations provided by +each namespace definition are effectively merged (from the semantic point of +view). For example, the following two code snippets are semantically +indistinguishable: + +.. code-block:: c++ + + // Snippet #1: + namespace N { + void f(); + } + namespace N { + void f(int); + } + + // Snippet #2: + namespace N { + void f(); + void f(int); + } + +In Clang's representation, the source-centric view of declaration contexts will +actually have two separate ``NamespaceDecl`` nodes in Snippet #1, each of which +is a declaration context that contains a single declaration of "``f``". +However, the semantics-centric view provided by name lookup into the namespace +``N`` for "``f``" will return a ``DeclContext::lookup_result`` that contains a +range of iterators over declarations of "``f``". + +``DeclContext`` manages multiply-defined declaration contexts internally. The +function ``DeclContext::getPrimaryContext`` retrieves the "primary" context for +a given ``DeclContext`` instance, which is the ``DeclContext`` responsible for +maintaining the lookup table used for the semantics-centric view. Given the +primary context, one can follow the chain of ``DeclContext`` nodes that define +additional declarations via ``DeclContext::getNextContext``. Note that these +functions are used internally within the lookup and insertion methods of the +``DeclContext``, so the vast majority of clients can ignore them. + +.. _CFG: + +The ``CFG`` class +----------------- + +The ``CFG`` class is designed to represent a source-level control-flow graph +for a single statement (``Stmt*``). Typically instances of ``CFG`` are +constructed for function bodies (usually an instance of ``CompoundStmt``), but +can also be instantiated to represent the control-flow of any class that +subclasses ``Stmt``, which includes simple expressions. Control-flow graphs +are especially useful for performing `flow- or path-sensitive +<http://en.wikipedia.org/wiki/Data_flow_analysis#Sensitivities>`_ program +analyses on a given function. + +Basic Blocks +^^^^^^^^^^^^ + +Concretely, an instance of ``CFG`` is a collection of basic blocks. Each basic +block is an instance of ``CFGBlock``, which simply contains an ordered sequence +of ``Stmt*`` (each referring to statements in the AST). The ordering of +statements within a block indicates unconditional flow of control from one +statement to the next. :ref:`Conditional control-flow +<ConditionalControlFlow>` is represented using edges between basic blocks. The +statements within a given ``CFGBlock`` can be traversed using the +``CFGBlock::*iterator`` interface. + +A ``CFG`` object owns the instances of ``CFGBlock`` within the control-flow +graph it represents. Each ``CFGBlock`` within a CFG is also uniquely numbered +(accessible via ``CFGBlock::getBlockID()``). Currently the number is based on +the ordering the blocks were created, but no assumptions should be made on how +``CFGBlocks`` are numbered other than their numbers are unique and that they +are numbered from 0..N-1 (where N is the number of basic blocks in the CFG). + +Entry and Exit Blocks +^^^^^^^^^^^^^^^^^^^^^ + +Each instance of ``CFG`` contains two special blocks: an *entry* block +(accessible via ``CFG::getEntry()``), which has no incoming edges, and an +*exit* block (accessible via ``CFG::getExit()``), which has no outgoing edges. +Neither block contains any statements, and they serve the role of providing a +clear entrance and exit for a body of code such as a function body. The +presence of these empty blocks greatly simplifies the implementation of many +analyses built on top of CFGs. + +.. _ConditionalControlFlow: + +Conditional Control-Flow +^^^^^^^^^^^^^^^^^^^^^^^^ + +Conditional control-flow (such as those induced by if-statements and loops) is +represented as edges between ``CFGBlocks``. Because different C language +constructs can induce control-flow, each ``CFGBlock`` also records an extra +``Stmt*`` that represents the *terminator* of the block. A terminator is +simply the statement that caused the control-flow, and is used to identify the +nature of the conditional control-flow between blocks. For example, in the +case of an if-statement, the terminator refers to the ``IfStmt`` object in the +AST that represented the given branch. + +To illustrate, consider the following code example: + +.. code-block:: c++ + + int foo(int x) { + x = x + 1; + if (x > 2) + x++; + else { + x += 2; + x *= 2; + } + + return x; + } + +After invoking the parser+semantic analyzer on this code fragment, the AST of +the body of ``foo`` is referenced by a single ``Stmt*``. We can then construct +an instance of ``CFG`` representing the control-flow graph of this function +body by single call to a static class method: + +.. code-block:: c++ + + Stmt *FooBody = ... + CFG *FooCFG = CFG::buildCFG(FooBody); + +It is the responsibility of the caller of ``CFG::buildCFG`` to ``delete`` the +returned ``CFG*`` when the CFG is no longer needed. + +Along with providing an interface to iterate over its ``CFGBlocks``, the +``CFG`` class also provides methods that are useful for debugging and +visualizing CFGs. For example, the method ``CFG::dump()`` dumps a +pretty-printed version of the CFG to standard error. This is especially useful +when one is using a debugger such as gdb. For example, here is the output of +``FooCFG->dump()``: + +.. code-block:: c++ + + [ B5 (ENTRY) ] + Predecessors (0): + Successors (1): B4 + + [ B4 ] + 1: x = x + 1 + 2: (x > 2) + T: if [B4.2] + Predecessors (1): B5 + Successors (2): B3 B2 + + [ B3 ] + 1: x++ + Predecessors (1): B4 + Successors (1): B1 + + [ B2 ] + 1: x += 2 + 2: x *= 2 + Predecessors (1): B4 + Successors (1): B1 + + [ B1 ] + 1: return x; + Predecessors (2): B2 B3 + Successors (1): B0 + + [ B0 (EXIT) ] + Predecessors (1): B1 + Successors (0): + +For each block, the pretty-printed output displays for each block the number of +*predecessor* blocks (blocks that have outgoing control-flow to the given +block) and *successor* blocks (blocks that have control-flow that have incoming +control-flow from the given block). We can also clearly see the special entry +and exit blocks at the beginning and end of the pretty-printed output. For the +entry block (block B5), the number of predecessor blocks is 0, while for the +exit block (block B0) the number of successor blocks is 0. + +The most interesting block here is B4, whose outgoing control-flow represents +the branching caused by the sole if-statement in ``foo``. Of particular +interest is the second statement in the block, ``(x > 2)``, and the terminator, +printed as ``if [B4.2]``. The second statement represents the evaluation of +the condition of the if-statement, which occurs before the actual branching of +control-flow. Within the ``CFGBlock`` for B4, the ``Stmt*`` for the second +statement refers to the actual expression in the AST for ``(x > 2)``. Thus +pointers to subclasses of ``Expr`` can appear in the list of statements in a +block, and not just subclasses of ``Stmt`` that refer to proper C statements. + +The terminator of block B4 is a pointer to the ``IfStmt`` object in the AST. +The pretty-printer outputs ``if [B4.2]`` because the condition expression of +the if-statement has an actual place in the basic block, and thus the +terminator is essentially *referring* to the expression that is the second +statement of block B4 (i.e., B4.2). In this manner, conditions for +control-flow (which also includes conditions for loops and switch statements) +are hoisted into the actual basic block. + +.. Implicit Control-Flow +.. ^^^^^^^^^^^^^^^^^^^^^ + +.. A key design principle of the ``CFG`` class was to not require any +.. transformations to the AST in order to represent control-flow. Thus the +.. ``CFG`` does not perform any "lowering" of the statements in an AST: loops +.. are not transformed into guarded gotos, short-circuit operations are not +.. converted to a set of if-statements, and so on. + +Constant Folding in the Clang AST +--------------------------------- + +There are several places where constants and constant folding matter a lot to +the Clang front-end. First, in general, we prefer the AST to retain the source +code as close to how the user wrote it as possible. This means that if they +wrote "``5+4``", we want to keep the addition and two constants in the AST, we +don't want to fold to "``9``". This means that constant folding in various +ways turns into a tree walk that needs to handle the various cases. + +However, there are places in both C and C++ that require constants to be +folded. For example, the C standard defines what an "integer constant +expression" (i-c-e) is with very precise and specific requirements. The +language then requires i-c-e's in a lot of places (for example, the size of a +bitfield, the value for a case statement, etc). For these, we have to be able +to constant fold the constants, to do semantic checks (e.g., verify bitfield +size is non-negative and that case statements aren't duplicated). We aim for +Clang to be very pedantic about this, diagnosing cases when the code does not +use an i-c-e where one is required, but accepting the code unless running with +``-pedantic-errors``. + +Things get a little bit more tricky when it comes to compatibility with +real-world source code. Specifically, GCC has historically accepted a huge +superset of expressions as i-c-e's, and a lot of real world code depends on +this unfortuate accident of history (including, e.g., the glibc system +headers). GCC accepts anything its "fold" optimizer is capable of reducing to +an integer constant, which means that the definition of what it accepts changes +as its optimizer does. One example is that GCC accepts things like "``case +X-X:``" even when ``X`` is a variable, because it can fold this to 0. + +Another issue are how constants interact with the extensions we support, such +as ``__builtin_constant_p``, ``__builtin_inf``, ``__extension__`` and many +others. C99 obviously does not specify the semantics of any of these +extensions, and the definition of i-c-e does not include them. However, these +extensions are often used in real code, and we have to have a way to reason +about them. + +Finally, this is not just a problem for semantic analysis. The code generator +and other clients have to be able to fold constants (e.g., to initialize global +variables) and has to handle a superset of what C99 allows. Further, these +clients can benefit from extended information. For example, we know that +"``foo() || 1``" always evaluates to ``true``, but we can't replace the +expression with ``true`` because it has side effects. + +Implementation Approach +^^^^^^^^^^^^^^^^^^^^^^^ + +After trying several different approaches, we've finally converged on a design +(Note, at the time of this writing, not all of this has been implemented, +consider this a design goal!). Our basic approach is to define a single +recursive method evaluation method (``Expr::Evaluate``), which is implemented +in ``AST/ExprConstant.cpp``. Given an expression with "scalar" type (integer, +fp, complex, or pointer) this method returns the following information: + +* Whether the expression is an integer constant expression, a general constant + that was folded but has no side effects, a general constant that was folded + but that does have side effects, or an uncomputable/unfoldable value. +* If the expression was computable in any way, this method returns the + ``APValue`` for the result of the expression. +* If the expression is not evaluatable at all, this method returns information + on one of the problems with the expression. This includes a + ``SourceLocation`` for where the problem is, and a diagnostic ID that explains + the problem. The diagnostic should have ``ERROR`` type. +* If the expression is not an integer constant expression, this method returns + information on one of the problems with the expression. This includes a + ``SourceLocation`` for where the problem is, and a diagnostic ID that + explains the problem. The diagnostic should have ``EXTENSION`` type. + +This information gives various clients the flexibility that they want, and we +will eventually have some helper methods for various extensions. For example, +``Sema`` should have a ``Sema::VerifyIntegerConstantExpression`` method, which +calls ``Evaluate`` on the expression. If the expression is not foldable, the +error is emitted, and it would return ``true``. If the expression is not an +i-c-e, the ``EXTENSION`` diagnostic is emitted. Finally it would return +``false`` to indicate that the AST is OK. + +Other clients can use the information in other ways, for example, codegen can +just use expressions that are foldable in any way. + +Extensions +^^^^^^^^^^ + +This section describes how some of the various extensions Clang supports +interacts with constant evaluation: + +* ``__extension__``: The expression form of this extension causes any + evaluatable subexpression to be accepted as an integer constant expression. +* ``__builtin_constant_p``: This returns true (as an integer constant + expression) if the operand evaluates to either a numeric value (that is, not + a pointer cast to integral type) of integral, enumeration, floating or + complex type, or if it evaluates to the address of the first character of a + string literal (possibly cast to some other type). As a special case, if + ``__builtin_constant_p`` is the (potentially parenthesized) condition of a + conditional operator expression ("``?:``"), only the true side of the + conditional operator is considered, and it is evaluated with full constant + folding. +* ``__builtin_choose_expr``: The condition is required to be an integer + constant expression, but we accept any constant as an "extension of an + extension". This only evaluates one operand depending on which way the + condition evaluates. +* ``__builtin_classify_type``: This always returns an integer constant + expression. +* ``__builtin_inf, nan, ...``: These are treated just like a floating-point + literal. +* ``__builtin_abs, copysign, ...``: These are constant folded as general + constant expressions. +* ``__builtin_strlen`` and ``strlen``: These are constant folded as integer + constant expressions if the argument is a string literal. + +How to change Clang +=================== + +How to add an attribute +----------------------- + +To add an attribute, you'll have to add it to the list of attributes, add it to +the parsing phase, and look for it in the AST scan. +`r124217 <http://llvm.org/viewvc/llvm-project?view=rev&revision=124217>`_ +has a good example of adding a warning attribute. + +(Beware that this hasn't been reviewed/fixed by the people who designed the +attributes system yet.) + + +``include/clang/Basic/Attr.td`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +First, add your attribute to the `include/clang/Basic/Attr.td file +<http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Attr.td?view=markup>`_. + +Each attribute gets a ``def`` inheriting from ``Attr`` or one of its +subclasses. ``InheritableAttr`` means that the attribute also applies to +subsequent declarations of the same name. + +``Spellings`` lists the strings that can appear in ``__attribute__((here))`` or +``[[here]]``. All such strings will be synonymous. If you want to allow the +``[[]]`` C++11 syntax, you have to define a list of ``Namespaces``, which will +let users write ``[[namespace::spelling]]``. Using the empty string for a +namespace will allow users to write just the spelling with no "``::``". +Attributes which g++-4.8 accepts should also have a +``CXX11<"gnu", "spelling">`` spelling. + +``Subjects`` restricts what kinds of AST node to which this attribute can +appertain (roughly, attach). + +``Args`` names the arguments the attribute takes, in order. If ``Args`` is +``[StringArgument<"Arg1">, IntArgument<"Arg2">]`` then +``__attribute__((myattribute("Hello", 3)))`` will be a valid use. + +Boilerplate +^^^^^^^^^^^ + +Write a new ``HandleYourAttr()`` function in `lib/Sema/SemaDeclAttr.cpp +<http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?view=markup>`_, +and add a case to the switch in ``ProcessNonInheritableDeclAttr()`` or +``ProcessInheritableDeclAttr()`` forwarding to it. + +If your attribute causes extra warnings to fire, define a ``DiagGroup`` in +`include/clang/Basic/DiagnosticGroups.td +<http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticGroups.td?view=markup>`_ +named after the attribute's ``Spelling`` with "_"s replaced by "-"s. If you're +only defining one diagnostic, you can skip ``DiagnosticGroups.td`` and use +``InGroup<DiagGroup<"your-attribute">>`` directly in `DiagnosticSemaKinds.td +<http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?view=markup>`_ + +The meat of your attribute +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Find an appropriate place in Clang to do whatever your attribute needs to do. +Check for the attribute's presence using ``Decl::getAttr<YourAttr>()``. + +Update the :doc:`LanguageExtensions` document to describe your new attribute. + +How to add an expression or statement +------------------------------------- + +Expressions and statements are one of the most fundamental constructs within a +compiler, because they interact with many different parts of the AST, semantic +analysis, and IR generation. Therefore, adding a new expression or statement +kind into Clang requires some care. The following list details the various +places in Clang where an expression or statement needs to be introduced, along +with patterns to follow to ensure that the new expression or statement works +well across all of the C languages. We focus on expressions, but statements +are similar. + +#. Introduce parsing actions into the parser. Recursive-descent parsing is + mostly self-explanatory, but there are a few things that are worth keeping + in mind: + + * Keep as much source location information as possible! You'll want it later + to produce great diagnostics and support Clang's various features that map + between source code and the AST. + * Write tests for all of the "bad" parsing cases, to make sure your recovery + is good. If you have matched delimiters (e.g., parentheses, square + brackets, etc.), use ``Parser::BalancedDelimiterTracker`` to give nice + diagnostics when things go wrong. + +#. Introduce semantic analysis actions into ``Sema``. Semantic analysis should + always involve two functions: an ``ActOnXXX`` function that will be called + directly from the parser, and a ``BuildXXX`` function that performs the + actual semantic analysis and will (eventually!) build the AST node. It's + fairly common for the ``ActOnCXX`` function to do very little (often just + some minor translation from the parser's representation to ``Sema``'s + representation of the same thing), but the separation is still important: + C++ template instantiation, for example, should always call the ``BuildXXX`` + variant. Several notes on semantic analysis before we get into construction + of the AST: + + * Your expression probably involves some types and some subexpressions. + Make sure to fully check that those types, and the types of those + subexpressions, meet your expectations. Add implicit conversions where + necessary to make sure that all of the types line up exactly the way you + want them. Write extensive tests to check that you're getting good + diagnostics for mistakes and that you can use various forms of + subexpressions with your expression. + * When type-checking a type or subexpression, make sure to first check + whether the type is "dependent" (``Type::isDependentType()``) or whether a + subexpression is type-dependent (``Expr::isTypeDependent()``). If any of + these return ``true``, then you're inside a template and you can't do much + type-checking now. That's normal, and your AST node (when you get there) + will have to deal with this case. At this point, you can write tests that + use your expression within templates, but don't try to instantiate the + templates. + * For each subexpression, be sure to call ``Sema::CheckPlaceholderExpr()`` + to deal with "weird" expressions that don't behave well as subexpressions. + Then, determine whether you need to perform lvalue-to-rvalue conversions + (``Sema::DefaultLvalueConversions``) or the usual unary conversions + (``Sema::UsualUnaryConversions``), for places where the subexpression is + producing a value you intend to use. + * Your ``BuildXXX`` function will probably just return ``ExprError()`` at + this point, since you don't have an AST. That's perfectly fine, and + shouldn't impact your testing. + +#. Introduce an AST node for your new expression. This starts with declaring + the node in ``include/Basic/StmtNodes.td`` and creating a new class for your + expression in the appropriate ``include/AST/Expr*.h`` header. It's best to + look at the class for a similar expression to get ideas, and there are some + specific things to watch for: + + * If you need to allocate memory, use the ``ASTContext`` allocator to + allocate memory. Never use raw ``malloc`` or ``new``, and never hold any + resources in an AST node, because the destructor of an AST node is never + called. + * Make sure that ``getSourceRange()`` covers the exact source range of your + expression. This is needed for diagnostics and for IDE support. + * Make sure that ``children()`` visits all of the subexpressions. This is + important for a number of features (e.g., IDE support, C++ variadic + templates). If you have sub-types, you'll also need to visit those + sub-types in the ``RecursiveASTVisitor``. + * Add printing support (``StmtPrinter.cpp``) and dumping support + (``StmtDumper.cpp``) for your expression. + * Add profiling support (``StmtProfile.cpp``) for your AST node, noting the + distinguishing (non-source location) characteristics of an instance of + your expression. Omitting this step will lead to hard-to-diagnose + failures regarding matching of template declarations. + +#. Teach semantic analysis to build your AST node. At this point, you can wire + up your ``Sema::BuildXXX`` function to actually create your AST. A few + things to check at this point: + + * If your expression can construct a new C++ class or return a new + Objective-C object, be sure to update and then call + ``Sema::MaybeBindToTemporary`` for your just-created AST node to be sure + that the object gets properly destructed. An easy way to test this is to + return a C++ class with a private destructor: semantic analysis should + flag an error here with the attempt to call the destructor. + * Inspect the generated AST by printing it using ``clang -cc1 -ast-print``, + to make sure you're capturing all of the important information about how + the AST was written. + * Inspect the generated AST under ``clang -cc1 -ast-dump`` to verify that + all of the types in the generated AST line up the way you want them. + Remember that clients of the AST should never have to "think" to + understand what's going on. For example, all implicit conversions should + show up explicitly in the AST. + * Write tests that use your expression as a subexpression of other, + well-known expressions. Can you call a function using your expression as + an argument? Can you use the ternary operator? + +#. Teach code generation to create IR to your AST node. This step is the first + (and only) that requires knowledge of LLVM IR. There are several things to + keep in mind: + + * Code generation is separated into scalar/aggregate/complex and + lvalue/rvalue paths, depending on what kind of result your expression + produces. On occasion, this requires some careful factoring of code to + avoid duplication. + * ``CodeGenFunction`` contains functions ``ConvertType`` and + ``ConvertTypeForMem`` that convert Clang's types (``clang::Type*`` or + ``clang::QualType``) to LLVM types. Use the former for values, and the + later for memory locations: test with the C++ "``bool``" type to check + this. If you find that you are having to use LLVM bitcasts to make the + subexpressions of your expression have the type that your expression + expects, STOP! Go fix semantic analysis and the AST so that you don't + need these bitcasts. + * The ``CodeGenFunction`` class has a number of helper functions to make + certain operations easy, such as generating code to produce an lvalue or + an rvalue, or to initialize a memory location with a given value. Prefer + to use these functions rather than directly writing loads and stores, + because these functions take care of some of the tricky details for you + (e.g., for exceptions). + * If your expression requires some special behavior in the event of an + exception, look at the ``push*Cleanup`` functions in ``CodeGenFunction`` + to introduce a cleanup. You shouldn't have to deal with + exception-handling directly. + * Testing is extremely important in IR generation. Use ``clang -cc1 + -emit-llvm`` and `FileCheck + <http://llvm.org/docs/CommandGuide/FileCheck.html>`_ to verify that you're + generating the right IR. + +#. Teach template instantiation how to cope with your AST node, which requires + some fairly simple code: + + * Make sure that your expression's constructor properly computes the flags + for type dependence (i.e., the type your expression produces can change + from one instantiation to the next), value dependence (i.e., the constant + value your expression produces can change from one instantiation to the + next), instantiation dependence (i.e., a template parameter occurs + anywhere in your expression), and whether your expression contains a + parameter pack (for variadic templates). Often, computing these flags + just means combining the results from the various types and + subexpressions. + * Add ``TransformXXX`` and ``RebuildXXX`` functions to the ``TreeTransform`` + class template in ``Sema``. ``TransformXXX`` should (recursively) + transform all of the subexpressions and types within your expression, + using ``getDerived().TransformYYY``. If all of the subexpressions and + types transform without error, it will then call the ``RebuildXXX`` + function, which will in turn call ``getSema().BuildXXX`` to perform + semantic analysis and build your expression. + * To test template instantiation, take those tests you wrote to make sure + that you were type checking with type-dependent expressions and dependent + types (from step #2) and instantiate those templates with various types, + some of which type-check and some that don't, and test the error messages + in each case. + +#. There are some "extras" that make other features work better. It's worth + handling these extras to give your expression complete integration into + Clang: + + * Add code completion support for your expression in + ``SemaCodeComplete.cpp``. + * If your expression has types in it, or has any "interesting" features + other than subexpressions, extend libclang's ``CursorVisitor`` to provide + proper visitation for your expression, enabling various IDE features such + as syntax highlighting, cross-referencing, and so on. The + ``c-index-test`` helper program can be used to test these features. + diff --git a/docs/IntroductionToTheClangAST.html b/docs/IntroductionToTheClangAST.html deleted file mode 100644 index 28175dd..0000000 --- a/docs/IntroductionToTheClangAST.html +++ /dev/null @@ -1,139 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>Introduction to the Clang AST</title> -<link type="text/css" rel="stylesheet" href="../menu.css" /> -<link type="text/css" rel="stylesheet" href="../content.css" /> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>Introduction to the Clang AST</h1> -<p>This document gives a gentle introduction to the mysteries of the Clang AST. -It is targeted at developers who either want to contribute to Clang, or use -tools that work based on Clang's AST, like the AST matchers.</p> -<!-- FIXME: Add link once we have an AST matcher document --> - -<!-- ======================================================================= --> -<h2 id="intro">Introduction</h2> -<!-- ======================================================================= --> - -<p>Clang's AST is different from ASTs produced by some other compilers in that it closely -resembles both the written C++ code and the C++ standard. For example, -parenthesis expressions and compile time constants are available in an unreduced -form in the AST. This makes Clang's AST a good fit for refactoring tools.</p> - -<p>Documentation for all Clang AST nodes is available via the generated -<a href="http://clang.llvm.org/doxygen">Doxygen</a>. The doxygen online -documentation is also indexed by your favorite search engine, which will make -a search for clang and the AST node's class name usually turn up the doxygen -of the class you're looking for (for example, search for: clang ParenExpr).</p> - -<!-- ======================================================================= --> -<h2 id="examine">Examining the AST</h2> -<!-- ======================================================================= --> - -<p>A good way to familarize yourself with the Clang AST is to actually look -at it on some simple example code. Clang has a builtin AST-dump modes, which -can be enabled with the flags -ast-dump and -ast-dump-xml. Note that -ast-dump-xml -currently only works with debug-builds of clang.</p> - -<p>Let's look at a simple example AST:</p> -<pre> -# cat test.cc -int f(int x) { - int result = (x / 42); - return result; -} - -# Clang by default is a frontend for many tools; -cc1 tells it to directly -# use the C++ compiler mode. -undef leaves out some internal declarations. -$ clang -cc1 -undef -ast-dump-xml test.cc -... cutting out internal declarations of clang ... -<TranslationUnit ptr="0x4871160"> - <Function ptr="0x48a5800" name="f" prototype="true"> - <FunctionProtoType ptr="0x4871de0" canonical="0x4871de0"> - <BuiltinType ptr="0x4871250" canonical="0x4871250"/> - <parameters> - <BuiltinType ptr="0x4871250" canonical="0x4871250"/> - </parameters> - </FunctionProtoType> - <ParmVar ptr="0x4871d80" name="x" initstyle="c"> - <BuiltinType ptr="0x4871250" canonical="0x4871250"/> - </ParmVar> - <Stmt> -(CompoundStmt 0x48a5a38 <t2.cc:1:14, line:4:1> - (DeclStmt 0x48a59c0 <line:2:3, col:24> - 0x48a58c0 "int result = - (ParenExpr 0x48a59a0 <col:16, col:23> 'int' - (BinaryOperator 0x48a5978 <col:17, col:21> 'int' '/' - (ImplicitCastExpr 0x48a5960 <col:17> 'int' <LValueToRValue> - (DeclRefExpr 0x48a5918 <col:17> 'int' lvalue ParmVar 0x4871d80 'x' 'int')) - (IntegerLiteral 0x48a5940 <col:21> 'int' 42)))") - (ReturnStmt 0x48a5a18 <line:3:3, col:10> - (ImplicitCastExpr 0x48a5a00 <col:10> 'int' <LValueToRValue> - (DeclRefExpr 0x48a59d8 <col:10> 'int' lvalue Var 0x48a58c0 'result' 'int')))) - - </Stmt> - </Function> -</TranslationUnit> -</pre> -<p>In general, -ast-dump-xml dumps declarations in an XML-style format and -statements in an S-expression-style format. -The toplevel declaration in a translation unit is always the -<a href="http://clang.llvm.org/doxygen/classclang_1_1TranslationUnitDecl.html">translation unit declaration</a>. -In this example, our first user written declaration is the -<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">function declaration</a> -of 'f'. The body of 'f' is a <a href="http://clang.llvm.org/doxygen/classclang_1_1CompoundStmt.html">compound statement</a>, -whose child nodes are a <a href="http://clang.llvm.org/doxygen/classclang_1_1DeclStmt.html">declaration statement</a> -that declares our result variable, and the -<a href="http://clang.llvm.org/doxygen/classclang_1_1ReturnStmt.html">return statement</a>.</p> - -<!-- ======================================================================= --> -<h2 id="context">AST Context</h2> -<!-- ======================================================================= --> - -<p>All information about the AST for a translation unit is bundled up in the class -<a href="http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html">ASTContext</a>. -It allows traversal of the whole translation unit starting from -<a href="http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html#abd909fb01ef10cfd0244832a67b1dd64">getTranslationUnitDecl</a>, -or to access Clang's <a href="http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html#a4f95adb9958e22fbe55212ae6482feb4">table of identifiers</a> -for the parsed translation unit.</p> - -<!-- ======================================================================= --> -<h2 id="nodes">AST Nodes</h2> -<!-- ======================================================================= --> - -<p>Clang's AST nodes are modeled on a class hierarchy that does not have a common -ancestor. Instead, there are multiple larger hierarchies for basic node types like -<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a> and -<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>. Many -important AST nodes derive from <a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>, -<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>, -<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclContext.html">DeclContext</a> or -<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>, -with some classes deriving from both Decl and DeclContext.</p> -<p>There are also a multitude of nodes in the AST that are not part of a -larger hierarchy, and are only reachable from specific other nodes, -like <a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBaseSpecifier.html">CXXBaseSpecifier</a>. -</p> - -<p>Thus, to traverse the full AST, one starts from the <a href="http://clang.llvm.org/doxygen/classclang_1_1TranslationUnitDecl.html">TranslationUnitDecl</a> -and then recursively traverses everything that can be reached from that node -- this information has to be encoded for each specific node type. This algorithm -is encoded in the <a href="http://clang.llvm.org/doxygen/classclang_1_1RecursiveASTVisitor.html">RecursiveASTVisitor</a>. -See the <a href="http://clang.llvm.org/docs/RAVFrontendAction.html">RecursiveASTVisitor tutorial</a>.</p> - -<p>The two most basic nodes in the Clang AST are statements (<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>) -and declarations (<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>). -Note that expressions (<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>) -are also statements in Clang's AST.</p> - -</div> -</body> -</html> - diff --git a/docs/IntroductionToTheClangAST.rst b/docs/IntroductionToTheClangAST.rst new file mode 100644 index 0000000..81eb7ed --- /dev/null +++ b/docs/IntroductionToTheClangAST.rst @@ -0,0 +1,135 @@ +============================= +Introduction to the Clang AST +============================= + +This document gives a gentle introduction to the mysteries of the Clang +AST. It is targeted at developers who either want to contribute to +Clang, or use tools that work based on Clang's AST, like the AST +matchers. + +Introduction +============ + +Clang's AST is different from ASTs produced by some other compilers in +that it closely resembles both the written C++ code and the C++ +standard. For example, parenthesis expressions and compile time +constants are available in an unreduced form in the AST. This makes +Clang's AST a good fit for refactoring tools. + +Documentation for all Clang AST nodes is available via the generated +`Doxygen <http://clang.llvm.org/doxygen>`_. The doxygen online +documentation is also indexed by your favorite search engine, which will +make a search for clang and the AST node's class name usually turn up +the doxygen of the class you're looking for (for example, search for: +clang ParenExpr). + +Examining the AST +================= + +A good way to familarize yourself with the Clang AST is to actually look +at it on some simple example code. Clang has a builtin AST-dump modes, +which can be enabled with the flags ``-ast-dump`` and ``-ast-dump-xml``. Note +that ``-ast-dump-xml`` currently only works with debug builds of clang. + +Let's look at a simple example AST: + +:: + + $ cat test.cc + int f(int x) { + int result = (x / 42); + return result; + } + + # Clang by default is a frontend for many tools; -cc1 tells it to directly + # use the C++ compiler mode. -undef leaves out some internal declarations. + $ clang -cc1 -undef -ast-dump-xml test.cc + ... cutting out internal declarations of clang ... + <TranslationUnit ptr="0x4871160"> + <Function ptr="0x48a5800" name="f" prototype="true"> + <FunctionProtoType ptr="0x4871de0" canonical="0x4871de0"> + <BuiltinType ptr="0x4871250" canonical="0x4871250"/> + <parameters> + <BuiltinType ptr="0x4871250" canonical="0x4871250"/> + </parameters> + </FunctionProtoType> + <ParmVar ptr="0x4871d80" name="x" initstyle="c"> + <BuiltinType ptr="0x4871250" canonical="0x4871250"/> + </ParmVar> + <Stmt> + (CompoundStmt 0x48a5a38 <t2.cc:1:14, line:4:1> + (DeclStmt 0x48a59c0 <line:2:3, col:24> + 0x48a58c0 "int result = + (ParenExpr 0x48a59a0 <col:16, col:23> 'int' + (BinaryOperator 0x48a5978 <col:17, col:21> 'int' '/' + (ImplicitCastExpr 0x48a5960 <col:17> 'int' <LValueToRValue> + (DeclRefExpr 0x48a5918 <col:17> 'int' lvalue ParmVar 0x4871d80 'x' 'int')) + (IntegerLiteral 0x48a5940 <col:21> 'int' 42)))") + (ReturnStmt 0x48a5a18 <line:3:3, col:10> + (ImplicitCastExpr 0x48a5a00 <col:10> 'int' <LValueToRValue> + (DeclRefExpr 0x48a59d8 <col:10> 'int' lvalue Var 0x48a58c0 'result' 'int')))) + + </Stmt> + </Function> + </TranslationUnit> + +In general, ``-ast-dump-xml`` dumps declarations in an XML-style format and +statements in an S-expression-style format. The toplevel declaration in +a translation unit is always the `translation unit +declaration <http://clang.llvm.org/doxygen/classclang_1_1TranslationUnitDecl.html>`_. +In this example, our first user written declaration is the `function +declaration <http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html>`_ +of "``f``". The body of "``f``" is a `compound +statement <http://clang.llvm.org/doxygen/classclang_1_1CompoundStmt.html>`_, +whose child nodes are a `declaration +statement <http://clang.llvm.org/doxygen/classclang_1_1DeclStmt.html>`_ +that declares our result variable, and the `return +statement <http://clang.llvm.org/doxygen/classclang_1_1ReturnStmt.html>`_. + +AST Context +=========== + +All information about the AST for a translation unit is bundled up in +the class +`ASTContext <http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html>`_. +It allows traversal of the whole translation unit starting from +`getTranslationUnitDecl <http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html#abd909fb01ef10cfd0244832a67b1dd64>`_, +or to access Clang's `table of +identifiers <http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html#a4f95adb9958e22fbe55212ae6482feb4>`_ +for the parsed translation unit. + +AST Nodes +========= + +Clang's AST nodes are modeled on a class hierarchy that does not have a +common ancestor. Instead, there are multiple larger hierarchies for +basic node types like +`Decl <http://clang.llvm.org/doxygen/classclang_1_1Decl.html>`_ and +`Stmt <http://clang.llvm.org/doxygen/classclang_1_1Stmt.html>`_. Many +important AST nodes derive from +`Type <http://clang.llvm.org/doxygen/classclang_1_1Type.html>`_, +`Decl <http://clang.llvm.org/doxygen/classclang_1_1Decl.html>`_, +`DeclContext <http://clang.llvm.org/doxygen/classclang_1_1DeclContext.html>`_ +or `Stmt <http://clang.llvm.org/doxygen/classclang_1_1Stmt.html>`_, with +some classes deriving from both Decl and DeclContext. + +There are also a multitude of nodes in the AST that are not part of a +larger hierarchy, and are only reachable from specific other nodes, like +`CXXBaseSpecifier <http://clang.llvm.org/doxygen/classclang_1_1CXXBaseSpecifier.html>`_. + +Thus, to traverse the full AST, one starts from the +`TranslationUnitDecl <http://clang.llvm.org/doxygen/classclang_1_1TranslationUnitDecl.html>`_ +and then recursively traverses everything that can be reached from that +node - this information has to be encoded for each specific node type. +This algorithm is encoded in the +`RecursiveASTVisitor <http://clang.llvm.org/doxygen/classclang_1_1RecursiveASTVisitor.html>`_. +See the `RecursiveASTVisitor +tutorial <http://clang.llvm.org/docs/RAVFrontendAction.html>`_. + +The two most basic nodes in the Clang AST are statements +(`Stmt <http://clang.llvm.org/doxygen/classclang_1_1Stmt.html>`_) and +declarations +(`Decl <http://clang.llvm.org/doxygen/classclang_1_1Decl.html>`_). Note +that expressions +(`Expr <http://clang.llvm.org/doxygen/classclang_1_1Expr.html>`_) are +also statements in Clang's AST. diff --git a/docs/JSONCompilationDatabase.html b/docs/JSONCompilationDatabase.html deleted file mode 100644 index 2907194..0000000 --- a/docs/JSONCompilationDatabase.html +++ /dev/null @@ -1,89 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>JSON Compilation Database Format Specification</title> -<link type="text/css" rel="stylesheet" href="../menu.css"> -<link type="text/css" rel="stylesheet" href="../content.css"> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>JSON Compilation Database Format Specification</h1> -<p>This document describes a format for specifying how to replay -single compilations independently of the build system.</p> - -<h2>Background</h2> -<p>Tools based on the C++ Abstract Syntax Tree need full information how to -parse a translation unit. Usually this information is implicitly -available in the build system, but running tools as part of -the build system is not necessarily the best solution: -<ul> -<li>Build systems are inherently change driven, so running multiple -tools over the same code base without changing the code does not fit -into the architecture of many build systems.</li> -<li>Figuring out whether things have changed is often an IO bound -process; this makes it hard to build low latency end user tools based -on the build system.</li> -<li>Build systems are inherently sequential in the build graph, for example -due to generated source code. While tools that run independently of the -build still need the generated source code to exist, running tools multiple -times over unchanging source does not require serialization of the runs -according to the build dependency graph.</li> -</ul> -</p> - -<h2>Supported Systems</h2> -<p>Currently <a href="http://cmake.org">CMake</a> (since 2.8.5) supports generation of compilation -databases for Unix Makefile builds (Ninja builds in the works) with the option -CMAKE_EXPORT_COMPILE_COMMANDS.</p> -<p>Clang's tooling interface supports reading compilation databases; see -the <a href="LibTooling.html">LibTooling documentation</a>. libclang and its -python bindings also support this (since clang 3.2); see -<a href="/doxygen/group__COMPILATIONDB.html">CXCompilationDatabase.h</a>.</p> - -<h2>Format</h2> -<p>A compilation database is a JSON file, which consist of an array of -"command objects", where each command object specifies one way a translation unit -is compiled in the project.</p> -<p>Each command object contains the translation unit's main file, the working -directory of the compile run and the actual compile command.</p> -<p>Example: -<pre> -[ - { "directory": "/home/user/llvm/build", - "command": "/usr/bin/clang++ -Irelative -DSOMEDEF='\"With spaces and quotes.\"' -c -o file.o file.cc", - "file": "file.cc" }, - ... -] -</pre> -The contracts for each field in the command object are: -<ul> -<li><b>directory:</b> The working directory of the compilation. All paths specified -in the <b>command</b> or <b>file</b> fields must be either absolute or relative to -this directory.</li> -<li><b>file:</b> The main translation unit source processed by this compilation step. -This is used by tools as the key into the compilation database. There can be multiple -command objects for the same file, for example if the same source file is -compiled with different configurations.</li> -<li><b>command:</b> The compile command executed. After JSON unescaping, this must -be a valid command to rerun the exact compilation step for the translation unit in -the environment the build system uses. Parameters use shell quoting and shell escaping -of quotes, with '"' and '\' being the only special characters. Shell expansion is -not supported.</li> -</ul> -</p> - -<h2>Build System Integration</h2> -<p>The convention is to name the file compile_commands.json and put it at the top -of the build directory. Clang tools are pointed to the top of the build directory -to detect the file and use the compilation database to parse C++ code in the source -tree.</p> - -</div> -</body> -</html> - diff --git a/docs/JSONCompilationDatabase.rst b/docs/JSONCompilationDatabase.rst new file mode 100644 index 0000000..926dcba --- /dev/null +++ b/docs/JSONCompilationDatabase.rst @@ -0,0 +1,88 @@ +============================================== +JSON Compilation Database Format Specification +============================================== + +This document describes a format for specifying how to replay single +compilations independently of the build system. + +Background +========== + +Tools based on the C++ Abstract Syntax Tree need full information how to +parse a translation unit. Usually this information is implicitly +available in the build system, but running tools as part of the build +system is not necessarily the best solution: + +- Build systems are inherently change driven, so running multiple tools + over the same code base without changing the code does not fit into + the architecture of many build systems. +- Figuring out whether things have changed is often an IO bound + process; this makes it hard to build low latency end user tools based + on the build system. +- Build systems are inherently sequential in the build graph, for + example due to generated source code. While tools that run + independently of the build still need the generated source code to + exist, running tools multiple times over unchanging source does not + require serialization of the runs according to the build dependency + graph. + +Supported Systems +================= + +Currently `CMake <http://cmake.org>`_ (since 2.8.5) supports generation +of compilation databases for Unix Makefile builds (Ninja builds in the +works) with the option ``CMAKE_EXPORT_COMPILE_COMMANDS``. + +For projects on Linux, there is an alternative to intercept compiler +calls with a tool called `Bear <https://github.com/rizsotto/Bear>`_. + +Clang's tooling interface supports reading compilation databases; see +the :doc:`LibTooling documentation <LibTooling>`. libclang and its +python bindings also support this (since clang 3.2); see +`CXCompilationDatabase.h </doxygen/group__COMPILATIONDB.html>`_. + +Format +====== + +A compilation database is a JSON file, which consist of an array of +"command objects", where each command object specifies one way a +translation unit is compiled in the project. + +Each command object contains the translation unit's main file, the +working directory of the compile run and the actual compile command. + +Example: + +:: + + [ + { "directory": "/home/user/llvm/build", + "command": "/usr/bin/clang++ -Irelative -DSOMEDEF=\"With spaces, quotes and \\-es.\" -c -o file.o file.cc", + "file": "file.cc" }, + ... + ] + +The contracts for each field in the command object are: + +- **directory:** The working directory of the compilation. All paths + specified in the **command** or **file** fields must be either + absolute or relative to this directory. +- **file:** The main translation unit source processed by this + compilation step. This is used by tools as the key into the + compilation database. There can be multiple command objects for the + same file, for example if the same source file is compiled with + different configurations. +- **command:** The compile command executed. After JSON unescaping, + this must be a valid command to rerun the exact compilation step for + the translation unit in the environment the build system uses. + Parameters use shell quoting and shell escaping of quotes, with '``"``' + and '``\``' being the only special characters. Shell expansion is not + supported. + +Build System Integration +======================== + +The convention is to name the file compile\_commands.json and put it at +the top of the build directory. Clang tools are pointed to the top of +the build directory to detect the file and use the compilation database +to parse C++ code in the source tree. diff --git a/docs/LanguageExtensions.html b/docs/LanguageExtensions.html deleted file mode 100644 index 8c0e5b7..0000000 --- a/docs/LanguageExtensions.html +++ /dev/null @@ -1,2082 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ --> -<html> -<head> - <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> - <title>Clang Language Extensions</title> - <link type="text/css" rel="stylesheet" href="../menu.css"> - <link type="text/css" rel="stylesheet" href="../content.css"> - <style type="text/css"> - td { - vertical-align: top; - } - th { background-color: #ffddaa; } - </style> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>Clang Language Extensions</h1> - -<ul> -<li><a href="#intro">Introduction</a></li> -<li><a href="#feature_check">Feature Checking Macros</a></li> -<li><a href="#has_include">Include File Checking Macros</a></li> -<li><a href="#builtinmacros">Builtin Macros</a></li> -<li><a href="#vectors">Vectors and Extended Vectors</a></li> -<li><a href="#deprecated">Messages on <tt>deprecated</tt> and <tt>unavailable</tt> attributes</a></li> -<li><a href="#attributes-on-enumerators">Attributes on enumerators</a></li> -<li><a href="#user_specified_system_framework">'User-Specified' System Frameworks</a></li> -<li><a href="#availability">Availability attribute</a></li> -<li><a href="#checking_language_features">Checks for Standard Language Features</a> - <ul> - <li><a href="#cxx98">C++98</a> - <ul> - <li><a href="#cxx_exceptions">C++ exceptions</a></li> - <li><a href="#cxx_rtti">C++ RTTI</a></li> - </ul></li> - <li><a href="#cxx11">C++11</a> - <ul> - <li><a href="#cxx_access_control_sfinae">C++11 SFINAE includes access control</a></li> - <li><a href="#cxx_alias_templates">C++11 alias templates</a></li> - <li><a href="#cxx_alignas">C++11 alignment specifiers</a></li> - <li><a href="#cxx_attributes">C++11 attributes</a></li> - <li><a href="#cxx_constexpr">C++11 generalized constant expressions</a></li> - <li><a href="#cxx_decltype">C++11 <tt>decltype()</tt></a></li> - <li><a href="#cxx_default_function_template_args">C++11 default template arguments in function templates</a></li> - <li><a href="#cxx_defaulted_functions">C++11 defaulted functions</a></li> - <li><a href="#cxx_delegating_constructor">C++11 delegating constructors</a></li> - <li><a href="#cxx_deleted_functions">C++11 deleted functions</a></li> - <li><a href="#cxx_explicit_conversions">C++11 explicit conversion functions</a></li> - <li><a href="#cxx_generalized_initializers">C++11 generalized initializers</a></li> - <li><a href="#cxx_implicit_moves">C++11 implicit move constructors/assignment operators</a></li> - <li><a href="#cxx_inheriting_constructors">C++11 inheriting constructors</a></li> - <li><a href="#cxx_inline_namespaces">C++11 inline namespaces</a></li> - <li><a href="#cxx_lambdas">C++11 lambdas</a></li> - <li><a href="#cxx_local_type_template_args">C++11 local and unnamed types as template arguments</a></li> - <li><a href="#cxx_noexcept">C++11 noexcept specification</a></li> - <li><a href="#cxx_nonstatic_member_init">C++11 in-class non-static data member initialization</a></li> - <li><a href="#cxx_nullptr">C++11 nullptr</a></li> - <li><a href="#cxx_override_control">C++11 override control</a></li> - <li><a href="#cxx_range_for">C++11 range-based for loop</a></li> - <li><a href="#cxx_raw_string_literals">C++11 raw string literals</a></li> - <li><a href="#cxx_rvalue_references">C++11 rvalue references</a></li> - <li><a href="#cxx_reference_qualified_functions">C++11 reference-qualified functions</a></li> - <li><a href="#cxx_static_assert">C++11 <tt>static_assert()</tt></a></li> - <li><a href="#cxx_auto_type">C++11 type inference</a></li> - <li><a href="#cxx_strong_enums">C++11 strongly-typed enumerations</a></li> - <li><a href="#cxx_trailing_return">C++11 trailing return type</a></li> - <li><a href="#cxx_unicode_literals">C++11 Unicode string literals</a></li> - <li><a href="#cxx_unrestricted_unions">C++11 unrestricted unions</a></li> - <li><a href="#cxx_user_literals">C++11 user-defined literals</a></li> - <li><a href="#cxx_variadic_templates">C++11 variadic templates</a></li> - </ul></li> - <li><a href="#c11">C11</a> - <ul> - <li><a href="#c_alignas">C11 alignment specifiers</a></li> - <li><a href="#c_atomic">C11 atomic operations</a></li> - <li><a href="#c_generic_selections">C11 generic selections</a></li> - <li><a href="#c_static_assert">C11 <tt>_Static_assert()</tt></a></li> - </ul></li> -</ul></li> -<li><a href="#checking_type_traits">Checks for Type Traits</a></li> -<li><a href="#blocks">Blocks</a></li> -<li><a href="#objc_features">Objective-C Features</a> - <ul> - <li><a href="#objc_instancetype">Related result types</a></li> - <li><a href="#objc_arc">Automatic reference counting</a></li> - <li><a href="#objc_fixed_enum">Enumerations with a fixed underlying type</a></li> - <li><a href="#objc_lambdas">Interoperability with C++11 lambdas</a></li> - <li><a href="#objc_object_literals_subscripting">Object Literals and Subscripting</a></li> - </ul> -</li> -<li><a href="#overloading-in-c">Function Overloading in C</a></li> -<li><a href="#complex-list-init">Initializer lists for complex numbers in C</a></li> -<li><a href="#builtins">Builtin Functions</a> - <ul> - <li><a href="#__builtin_readcyclecounter">__builtin_readcyclecounter</a></li> - <li><a href="#__builtin_shufflevector">__builtin_shufflevector</a></li> - <li><a href="#__builtin_unreachable">__builtin_unreachable</a></li> - <li><a href="#__sync_swap">__sync_swap</a></li> - </ul> -</li> -<li><a href="#non-standard-attributes">Non-standard C++11 Attributes</a> -<ul> - <li><a href="#clang__fallthrough">The <tt>clang::fallthrough</tt> attribute</a></li> -</ul> -</li> -<li><a href="#targetspecific">Target-Specific Extensions</a> - <ul> - <li><a href="#x86-specific">X86/X86-64 Language Extensions</a></li> - </ul> -</li> -<li><a href="#analyzerspecific">Static Analysis-Specific Extensions</a></li> -<li><a href="#dynamicanalyzerspecific">Dynamic Analysis-Specific Extensions</a> - <ul> - <li><a href="#address_sanitizer">AddressSanitizer</a></li> - </ul> -</li> -<li><a href="#threadsafety">Thread Safety Annotation Checking</a> - <ul> - <li><a href="#ts_noanal"><tt>no_thread_safety_analysis</tt></a></li> - <li><a href="#ts_lockable"><tt>lockable</tt></a></li> - <li><a href="#ts_scopedlockable"><tt>scoped_lockable</tt></a></li> - <li><a href="#ts_guardedvar"><tt>guarded_var</tt></a></li> - <li><a href="#ts_ptguardedvar"><tt>pt_guarded_var</tt></a></li> - <li><a href="#ts_guardedby"><tt>guarded_by(l)</tt></a></li> - <li><a href="#ts_ptguardedby"><tt>pt_guarded_by(l)</tt></a></li> - <li><a href="#ts_acquiredbefore"><tt>acquired_before(...)</tt></a></li> - <li><a href="#ts_acquiredafter"><tt>acquired_after(...)</tt></a></li> - <li><a href="#ts_elf"><tt>exclusive_lock_function(...)</tt></a></li> - <li><a href="#ts_slf"><tt>shared_lock_function(...)</tt></a></li> - <li><a href="#ts_etf"><tt>exclusive_trylock_function(...)</tt></a></li> - <li><a href="#ts_stf"><tt>shared_trylock_function(...)</tt></a></li> - <li><a href="#ts_uf"><tt>unlock_function(...)</tt></a></li> - <li><a href="#ts_lr"><tt>lock_returned(l)</tt></a></li> - <li><a href="#ts_le"><tt>locks_excluded(...)</tt></a></li> - <li><a href="#ts_elr"><tt>exclusive_locks_required(...)</tt></a></li> - <li><a href="#ts_slr"><tt>shared_locks_required(...)</tt></a></li> - </ul> -</li> -<li><a href="#type_safety">Type Safety Checking</a> - <ul> - <li><a href="#argument_with_type_tag"><tt>argument_with_type_tag(...)</tt></a></li> - <li><a href="#pointer_with_type_tag"><tt>pointer_with_type_tag(...)</tt></a></li> - <li><a href="#type_tag_for_datatype"><tt>type_tag_for_datatype(...)</tt></a></li> - </ul> -</li> -</ul> - -<!-- ======================================================================= --> -<h2 id="intro">Introduction</h2> -<!-- ======================================================================= --> - -<p>This document describes the language extensions provided by Clang. In -addition to the language extensions listed here, Clang aims to support a broad -range of GCC extensions. Please see the <a -href="http://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html">GCC manual</a> for -more information on these extensions.</p> - -<!-- ======================================================================= --> -<h2 id="feature_check">Feature Checking Macros</h2> -<!-- ======================================================================= --> - -<p>Language extensions can be very useful, but only if you know you can depend -on them. In order to allow fine-grain features checks, we support three builtin -function-like macros. This allows you to directly test for a feature in your -code without having to resort to something like autoconf or fragile "compiler -version checks".</p> - -<!-- ======================================================================= --> -<h3><a name="__has_builtin">__has_builtin</a></h3> -<!-- ======================================================================= --> - -<p>This function-like macro takes a single identifier argument that is the name -of a builtin function. It evaluates to 1 if the builtin is supported or 0 if -not. It can be used like this:</p> - -<blockquote> -<pre> -#ifndef __has_builtin // Optional of course. - #define __has_builtin(x) 0 // Compatibility with non-clang compilers. -#endif - -... -#if __has_builtin(__builtin_trap) - __builtin_trap(); -#else - abort(); -#endif -... -</pre> -</blockquote> - - -<!-- ======================================================================= --> -<h3><a name="__has_feature_extension"> __has_feature and __has_extension</a></h3> -<!-- ======================================================================= --> - -<p>These function-like macros take a single identifier argument that is the -name of a feature. <code>__has_feature</code> evaluates to 1 if the feature -is both supported by Clang and standardized in the current language standard -or 0 if not (but see <a href="#has_feature_back_compat">below</a>), while -<code>__has_extension</code> evaluates to 1 if the feature is supported by -Clang in the current language (either as a language extension or a standard -language feature) or 0 if not. They can be used like this:</p> - -<blockquote> -<pre> -#ifndef __has_feature // Optional of course. - #define __has_feature(x) 0 // Compatibility with non-clang compilers. -#endif -#ifndef __has_extension - #define __has_extension __has_feature // Compatibility with pre-3.0 compilers. -#endif - -... -#if __has_feature(cxx_rvalue_references) -// This code will only be compiled with the -std=c++11 and -std=gnu++11 -// options, because rvalue references are only standardized in C++11. -#endif - -#if __has_extension(cxx_rvalue_references) -// This code will be compiled with the -std=c++11, -std=gnu++11, -std=c++98 -// and -std=gnu++98 options, because rvalue references are supported as a -// language extension in C++98. -#endif -</pre> -</blockquote> - -<p id="has_feature_back_compat">For backwards compatibility reasons, -<code>__has_feature</code> can also be used to test for support for -non-standardized features, i.e. features not prefixed <code>c_</code>, -<code>cxx_</code> or <code>objc_</code>.</p> - -<p id="has_feature_for_non_language_features"> -Another use of <code>__has_feature</code> is to check for compiler features -not related to the language standard, such as e.g. -<a href="AddressSanitizer.html">AddressSanitizer</a>. - -<p>If the <code>-pedantic-errors</code> option is given, -<code>__has_extension</code> is equivalent to <code>__has_feature</code>.</p> - -<p>The feature tag is described along with the language feature below.</p> - -<p>The feature name or extension name can also be specified with a preceding and -following <code>__</code> (double underscore) to avoid interference from a macro -with the same name. For instance, <code>__cxx_rvalue_references__</code> can be -used instead of <code>cxx_rvalue_references</code>.</p> - -<!-- ======================================================================= --> -<h3><a name="__has_attribute">__has_attribute</a></h3> -<!-- ======================================================================= --> - -<p>This function-like macro takes a single identifier argument that is the name -of an attribute. It evaluates to 1 if the attribute is supported or 0 if not. It -can be used like this:</p> - -<blockquote> -<pre> -#ifndef __has_attribute // Optional of course. - #define __has_attribute(x) 0 // Compatibility with non-clang compilers. -#endif - -... -#if __has_attribute(always_inline) -#define ALWAYS_INLINE __attribute__((always_inline)) -#else -#define ALWAYS_INLINE -#endif -... -</pre> -</blockquote> - -<p>The attribute name can also be specified with a preceding and -following <code>__</code> (double underscore) to avoid interference from a macro -with the same name. For instance, <code>__always_inline__</code> can be used -instead of <code>always_inline</code>.</p> - -<!-- ======================================================================= --> -<h2 id="has_include">Include File Checking Macros</h2> -<!-- ======================================================================= --> - -<p>Not all developments systems have the same include files. -The <a href="#__has_include">__has_include</a> and -<a href="#__has_include_next">__has_include_next</a> macros allow you to -check for the existence of an include file before doing -a possibly failing #include directive.</p> - -<!-- ======================================================================= --> -<h3><a name="__has_include">__has_include</a></h3> -<!-- ======================================================================= --> - -<p>This function-like macro takes a single file name string argument that -is the name of an include file. It evaluates to 1 if the file can -be found using the include paths, or 0 otherwise:</p> - -<blockquote> -<pre> -// Note the two possible file name string formats. -#if __has_include("myinclude.h") && __has_include(<stdint.h>) -# include "myinclude.h" -#endif - -// To avoid problem with non-clang compilers not having this macro. -#if defined(__has_include) && __has_include("myinclude.h") -# include "myinclude.h" -#endif -</pre> -</blockquote> - -<p>To test for this feature, use #if defined(__has_include).</p> - -<!-- ======================================================================= --> -<h3><a name="__has_include_next">__has_include_next</a></h3> -<!-- ======================================================================= --> - -<p>This function-like macro takes a single file name string argument that -is the name of an include file. It is like __has_include except that it -looks for the second instance of the given file found in the include -paths. It evaluates to 1 if the second instance of the file can -be found using the include paths, or 0 otherwise:</p> - -<blockquote> -<pre> -// Note the two possible file name string formats. -#if __has_include_next("myinclude.h") && __has_include_next(<stdint.h>) -# include_next "myinclude.h" -#endif - -// To avoid problem with non-clang compilers not having this macro. -#if defined(__has_include_next) && __has_include_next("myinclude.h") -# include_next "myinclude.h" -#endif -</pre> -</blockquote> - -<p>Note that __has_include_next, like the GNU extension -#include_next directive, is intended for use in headers only, -and will issue a warning if used in the top-level compilation -file. A warning will also be issued if an absolute path -is used in the file argument.</p> - - -<!-- ======================================================================= --> -<h3><a name="__has_warning">__has_warning</a></h3> -<!-- ======================================================================= --> - -<p>This function-like macro takes a string literal that represents a command - line option for a warning and returns true if that is a valid warning - option.</p> - -<blockquote> -<pre> -#if __has_warning("-Wformat") -... -#endif -</pre> -</blockquote> - -<!-- ======================================================================= --> -<h2 id="builtinmacros">Builtin Macros</h2> -<!-- ======================================================================= --> - -<dl> - <dt><code>__BASE_FILE__</code></dt> - <dd>Defined to a string that contains the name of the main input - file passed to Clang.</dd> - - <dt><code>__COUNTER__</code></dt> - <dd>Defined to an integer value that starts at zero and is - incremented each time the <code>__COUNTER__</code> macro is - expanded.</dd> - - <dt><code>__INCLUDE_LEVEL__</code></dt> - <dd>Defined to an integral value that is the include depth of the - file currently being translated. For the main file, this value is - zero.</dd> - - <dt><code>__TIMESTAMP__</code></dt> - <dd>Defined to the date and time of the last modification of the - current source file.</dd> - - <dt><code>__clang__</code></dt> - <dd>Defined when compiling with Clang</dd> - - <dt><code>__clang_major__</code></dt> - <dd>Defined to the major marketing version number of Clang (e.g., the - 2 in 2.0.1). Note that marketing version numbers should not be used to - check for language features, as different vendors use different numbering - schemes. Instead, use the <a href="#feature_check">feature checking - macros</a>.</dd> - - <dt><code>__clang_minor__</code></dt> - <dd>Defined to the minor version number of Clang (e.g., the 0 in - 2.0.1). Note that marketing version numbers should not be used to - check for language features, as different vendors use different numbering - schemes. Instead, use the <a href="#feature_check">feature checking - macros</a>.</dd> - - <dt><code>__clang_patchlevel__</code></dt> - <dd>Defined to the marketing patch level of Clang (e.g., the 1 in 2.0.1).</dd> - - <dt><code>__clang_version__</code></dt> - <dd>Defined to a string that captures the Clang marketing version, including - the Subversion tag or revision number, e.g., "1.5 (trunk 102332)".</dd> -</dl> - -<!-- ======================================================================= --> -<h2 id="vectors">Vectors and Extended Vectors</h2> -<!-- ======================================================================= --> - -<p>Supports the GCC, OpenCL, AltiVec and NEON vector extensions.</p> - -<p>OpenCL vector types are created using <tt>ext_vector_type</tt> attribute. It -support for <tt>V.xyzw</tt> syntax and other tidbits as seen in OpenCL. An -example is:</p> - -<blockquote> -<pre> -typedef float float4 <b>__attribute__((ext_vector_type(4)))</b>; -typedef float float2 <b>__attribute__((ext_vector_type(2)))</b>; - -float4 foo(float2 a, float2 b) { - float4 c; - c.xz = a; - c.yw = b; - return c; -} -</pre> -</blockquote> - -<p>Query for this feature with -<tt>__has_extension(attribute_ext_vector_type)</tt>.</p> - -<p>Giving <tt>-faltivec</tt> option to clang enables support for AltiVec vector -syntax and functions. For example:</p> - -<blockquote> -<pre> -vector float foo(vector int a) { - vector int b; - b = vec_add(a, a) + a; - return (vector float)b; -} -</pre> -</blockquote> - -<p>NEON vector types are created using <tt>neon_vector_type</tt> and -<tt>neon_polyvector_type</tt> attributes. For example:</p> - -<blockquote> -<pre> -typedef <b>__attribute__((neon_vector_type(8)))</b> int8_t int8x8_t; -typedef <b>__attribute__((neon_polyvector_type(16)))</b> poly8_t poly8x16_t; - -int8x8_t foo(int8x8_t a) { - int8x8_t v; - v = a; - return v; -} -</pre> -</blockquote> - -<!-- ======================================================================= --> -<h3><a name="vector_literals">Vector Literals</a></h3> -<!-- ======================================================================= --> - -<p>Vector literals can be used to create vectors from a set of scalars, or -vectors. Either parentheses or braces form can be used. In the parentheses form -the number of literal values specified must be one, i.e. referring to a scalar -value, or must match the size of the vector type being created. If a single -scalar literal value is specified, the scalar literal value will be replicated -to all the components of the vector type. In the brackets form any number of -literals can be specified. For example:</p> - -<blockquote> -<pre> -typedef int v4si __attribute__((__vector_size__(16))); -typedef float float4 __attribute__((ext_vector_type(4))); -typedef float float2 __attribute__((ext_vector_type(2))); - -v4si vsi = (v4si){1, 2, 3, 4}; -float4 vf = (float4)(1.0f, 2.0f, 3.0f, 4.0f); -vector int vi1 = (vector int)(1); // vi1 will be (1, 1, 1, 1). -vector int vi2 = (vector int){1}; // vi2 will be (1, 0, 0, 0). -vector int vi3 = (vector int)(1, 2); // error -vector int vi4 = (vector int){1, 2}; // vi4 will be (1, 2, 0, 0). -vector int vi5 = (vector int)(1, 2, 3, 4); -float4 vf = (float4)((float2)(1.0f, 2.0f), (float2)(3.0f, 4.0f)); -</pre> -</blockquote> - -<!-- ======================================================================= --> -<h3><a name="vector_operations">Vector Operations</a></h3> -<!-- ======================================================================= --> - -<p>The table below shows the support for each operation by vector extension. -A dash indicates that an operation is not accepted according to a corresponding -specification.</p> - -<table width="500" border="1" cellspacing="0"> - <tr> - <th>Operator</th> - <th>OpenCL</th> - <th>AltiVec</th> - <th>GCC</th> - <th>NEON</th> - </tr> - <tr> - <td>[]</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">-</td> - </tr> - <tr> - <td>unary operators +, -</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">-</td> - </tr> - <tr> - <td>++, --</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">-</td> - <td align="center">-</td> - </tr> - <tr> - <td>+, -, *, /, %</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">-</td> - </tr> - <tr> - <td>bitwise operators &, |, ^, ~</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">-</td> - </tr> - <tr> - <td>>>, <<</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">-</td> - </tr> - <tr> - <td>!, &&,||</td> - <td align="center">no</td> - <td align="center">-</td> - <td align="center">-</td> - <td align="center">-</td> - </tr> - <tr> - <td>==,!=, >, <, >=, <=</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">-</td> - <td align="center">-</td> - </tr> - <tr> - <td>=</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">yes</td> - </tr> - <tr> - <td>:?</td> - <td align="center">yes</td> - <td align="center">-</td> - <td align="center">-</td> - <td align="center">-</td> - </tr> - <tr> - <td>sizeof</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">yes</td> - <td align="center">yes</td> - </tr> -</table> - -<p>See also <a href="#__builtin_shufflevector">__builtin_shufflevector</a>.</p> - -<!-- ======================================================================= --> -<h2 id="deprecated">Messages on <tt>deprecated</tt> and <tt>unavailable</tt> Attributes</h2> -<!-- ======================================================================= --> - -<p>An optional string message can be added to the <tt>deprecated</tt> -and <tt>unavailable</tt> attributes. For example:</p> - -<blockquote> -<pre>void explode(void) __attribute__((deprecated("extremely unsafe, use 'combust' instead!!!")));</pre> -</blockquote> - -<p>If the deprecated or unavailable declaration is used, the message -will be incorporated into the appropriate diagnostic:</p> - -<blockquote> -<pre>harmless.c:4:3: warning: 'explode' is deprecated: extremely unsafe, use 'combust' instead!!! - [-Wdeprecated-declarations] - explode(); - ^</pre> -</blockquote> - -<p>Query for this feature -with <tt>__has_extension(attribute_deprecated_with_message)</tt> -and <tt>__has_extension(attribute_unavailable_with_message)</tt>.</p> - -<!-- ======================================================================= --> -<h2 id="attributes-on-enumerators">Attributes on Enumerators</h2> -<!-- ======================================================================= --> - -<p>Clang allows attributes to be written on individual enumerators. -This allows enumerators to be deprecated, made unavailable, etc. The -attribute must appear after the enumerator name and before any -initializer, like so:</p> - -<blockquote> -<pre>enum OperationMode { - OM_Invalid, - OM_Normal, - OM_Terrified __attribute__((deprecated)), - OM_AbortOnError __attribute__((deprecated)) = 4 -};</pre> -</blockquote> - -<p>Attributes on the <tt>enum</tt> declaration do not apply to -individual enumerators.</p> - -<p>Query for this feature with <tt>__has_extension(enumerator_attributes)</tt>.</p> - -<!-- ======================================================================= --> -<h2 id="user_specified_system_framework">'User-Specified' System Frameworks</h2> -<!-- ======================================================================= --> - -<p>Clang provides a mechanism by which frameworks can be built in such a way -that they will always be treated as being 'system frameworks', even if they are -not present in a system framework directory. This can be useful to system -framework developers who want to be able to test building other applications -with development builds of their framework, including the manner in which the -compiler changes warning behavior for system headers.</p> - -<p>Framework developers can opt-in to this mechanism by creating a -'.system_framework' file at the top-level of their framework. That is, the -framework should have contents like:</p> - -<pre> - .../TestFramework.framework - .../TestFramework.framework/.system_framework - .../TestFramework.framework/Headers - .../TestFramework.framework/Headers/TestFramework.h - ... -</pre> - -<p>Clang will treat the presence of this file as an indicator that the framework -should be treated as a system framework, regardless of how it was found in the -framework search path. For consistency, we recommend that such files never be -included in installed versions of the framework.</p> - -<!-- ======================================================================= --> -<h2 id="availability">Availability attribute</h2> -<!-- ======================================================================= --> - -<p>Clang introduces the <code>availability</code> attribute, which can -be placed on declarations to describe the lifecycle of that -declaration relative to operating system versions. Consider the function declaration for a hypothetical function <code>f</code>:</p> - -<pre> -void f(void) __attribute__((availability(macosx,introduced=10.4,deprecated=10.6,obsoleted=10.7))); -</pre> - -<p>The availability attribute states that <code>f</code> was introduced in Mac OS X 10.4, deprecated in Mac OS X 10.6, and obsoleted in Mac OS X 10.7. This information is used by Clang to determine when it is safe to use <code>f</code>: for example, if Clang is instructed to compile code for Mac OS X 10.5, a call to <code>f()</code> succeeds. If Clang is instructed to compile code for Mac OS X 10.6, the call succeeds but Clang emits a warning specifying that the function is deprecated. Finally, if Clang is instructed to compile code for Mac OS X 10.7, the call fails because <code>f()</code> is no longer available.</p> - -<p>The availablility attribute is a comma-separated list starting with the platform name and then including clauses specifying important milestones in the declaration's lifetime (in any order) along with additional information. Those clauses can be:</p> - -<dl> - <dt>introduced=<i>version</i></dt> - <dd>The first version in which this declaration was introduced.</dd> - - <dt>deprecated=<i>version</i></dt> - <dd>The first version in which this declaration was deprecated, meaning that users should migrate away from this API.</dd> - - <dt>obsoleted=<i>version</i></dt> - <dd>The first version in which this declaration was obsoleted, meaning that it was removed completely and can no longer be used.</dd> - - <dt>unavailable</dt> - <dd>This declaration is never available on this platform.</dd> - - <dt>message=<i>string-literal</i></dt> - <dd>Additional message text that Clang will provide when emitting a warning or error about use of a deprecated or obsoleted declaration. Useful to direct users to replacement APIs.</dd> -</dl> - -<p>Multiple availability attributes can be placed on a declaration, which may correspond to different platforms. Only the availability attribute with the platform corresponding to the target platform will be used; any others will be ignored. If no availability attribute specifies availability for the current target platform, the availability attributes are ignored. Supported platforms are:</p> - -<dl> - <dt>ios</dt> - <dd>Apple's iOS operating system. The minimum deployment target is specified by the <code>-mios-version-min=<i>version</i></code> or <code>-miphoneos-version-min=<i>version</i></code> command-line arguments.</dd> - - <dt>macosx</dt> - <dd>Apple's Mac OS X operating system. The minimum deployment target is specified by the <code>-mmacosx-version-min=<i>version</i></code> command-line argument.</dd> -</dl> - -<p>A declaration can be used even when deploying back to a platform -version prior to when the declaration was introduced. When this -happens, the declaration is <a - href="https://developer.apple.com/library/mac/#documentation/MacOSX/Conceptual/BPFrameworks/Concepts/WeakLinking.html">weakly -linked</a>, as if the <code>weak_import</code> attribute were added to the declaration. A weakly-linked declaration may or may not be present a run-time, and a program can determine whether the declaration is present by checking whether the address of that declaration is non-NULL.</p> - -<!-- ======================================================================= --> -<h2 id="checking_language_features">Checks for Standard Language Features</h2> -<!-- ======================================================================= --> - -<p>The <tt>__has_feature</tt> macro can be used to query if certain standard -language features are enabled. The <tt>__has_extension</tt> macro can be used -to query if language features are available as an extension when compiling for -a standard which does not provide them. The features which can be tested are -listed here.</p> - -<h3 id="cxx98">C++98</h3> - -<p>The features listed below are part of the C++98 standard. These features are -enabled by default when compiling C++ code.</p> - -<h4 id="cxx_exceptions">C++ exceptions</h4> - -<p>Use <tt>__has_feature(cxx_exceptions)</tt> to determine if C++ exceptions have been enabled. For -example, compiling code with <tt>-fno-exceptions</tt> disables C++ exceptions.</p> - -<h4 id="cxx_rtti">C++ RTTI</h4> - -<p>Use <tt>__has_feature(cxx_rtti)</tt> to determine if C++ RTTI has been enabled. For example, -compiling code with <tt>-fno-rtti</tt> disables the use of RTTI.</p> - -<h3 id="cxx11">C++11</h3> - -<p>The features listed below are part of the C++11 standard. As a result, all -these features are enabled with the <tt>-std=c++11</tt> or <tt>-std=gnu++11</tt> -option when compiling C++ code.</p> - -<h4 id="cxx_access_control_sfinae">C++11 SFINAE includes access control</h4> - -<p>Use <tt>__has_feature(cxx_access_control_sfinae)</tt> or <tt>__has_extension(cxx_access_control_sfinae)</tt> to determine whether access-control errors (e.g., calling a private constructor) are considered to be template argument deduction errors (aka SFINAE errors), per <a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1170">C++ DR1170</a>.</p> - -<h4 id="cxx_alias_templates">C++11 alias templates</h4> - -<p>Use <tt>__has_feature(cxx_alias_templates)</tt> or -<tt>__has_extension(cxx_alias_templates)</tt> to determine if support for -C++11's alias declarations and alias templates is enabled.</p> - -<h4 id="cxx_alignas">C++11 alignment specifiers</h4> - -<p>Use <tt>__has_feature(cxx_alignas)</tt> or -<tt>__has_extension(cxx_alignas)</tt> to determine if support for alignment -specifiers using <tt>alignas</tt> is enabled.</p> - -<h4 id="cxx_attributes">C++11 attributes</h4> - -<p>Use <tt>__has_feature(cxx_attributes)</tt> or -<tt>__has_extension(cxx_attributes)</tt> to determine if support for attribute -parsing with C++11's square bracket notation is enabled.</p> - -<h4 id="cxx_constexpr">C++11 generalized constant expressions</h4> - -<p>Use <tt>__has_feature(cxx_constexpr)</tt> to determine if support -for generalized constant expressions (e.g., <tt>constexpr</tt>) is -enabled.</p> - -<h4 id="cxx_decltype">C++11 <tt>decltype()</tt></h4> - -<p>Use <tt>__has_feature(cxx_decltype)</tt> or -<tt>__has_extension(cxx_decltype)</tt> to determine if support for the -<tt>decltype()</tt> specifier is enabled. C++11's <tt>decltype</tt> -does not require type-completeness of a function call expression. -Use <tt>__has_feature(cxx_decltype_incomplete_return_types)</tt> -or <tt>__has_extension(cxx_decltype_incomplete_return_types)</tt> -to determine if support for this feature is enabled.</p> - -<h4 id="cxx_default_function_template_args">C++11 default template arguments in function templates</h4> - -<p>Use <tt>__has_feature(cxx_default_function_template_args)</tt> or -<tt>__has_extension(cxx_default_function_template_args)</tt> to determine -if support for default template arguments in function templates is enabled.</p> - -<h4 id="cxx_defaulted_functions">C++11 <tt>default</tt>ed functions</h4> - -<p>Use <tt>__has_feature(cxx_defaulted_functions)</tt> or -<tt>__has_extension(cxx_defaulted_functions)</tt> to determine if support for -defaulted function definitions (with <tt>= default</tt>) is enabled.</p> - -<h4 id="cxx_delegating_constructors">C++11 delegating constructors</h4> - -<p>Use <tt>__has_feature(cxx_delegating_constructors)</tt> to determine if -support for delegating constructors is enabled.</p> - -<h4 id="cxx_deleted_functions">C++11 <tt>delete</tt>d functions</h4> - -<p>Use <tt>__has_feature(cxx_deleted_functions)</tt> or -<tt>__has_extension(cxx_deleted_functions)</tt> to determine if support for -deleted function definitions (with <tt>= delete</tt>) is enabled.</p> - -<h4 id="cxx_explicit_conversions">C++11 explicit conversion functions</h4> -<p>Use <tt>__has_feature(cxx_explicit_conversions)</tt> to determine if support for <tt>explicit</tt> conversion functions is enabled.</p> - -<h4 id="cxx_generalized_initializers">C++11 generalized initializers</h4> - -<p>Use <tt>__has_feature(cxx_generalized_initializers)</tt> to determine if -support for generalized initializers (using braced lists and -<tt>std::initializer_list</tt>) is enabled.</p> - -<h4 id="cxx_implicit_moves">C++11 implicit move constructors/assignment operators</h4> - -<p>Use <tt>__has_feature(cxx_implicit_moves)</tt> to determine if Clang will -implicitly generate move constructors and move assignment operators where needed.</p> - -<h4 id="cxx_inheriting_constructors">C++11 inheriting constructors</h4> - -<p>Use <tt>__has_feature(cxx_inheriting_constructors)</tt> to determine if support for inheriting constructors is enabled. Clang does not currently implement this feature.</p> - -<h4 id="cxx_inline_namespaces">C++11 inline namespaces</h4> - -<p>Use <tt>__has_feature(cxx_inline_namespaces)</tt> or -<tt>__has_extension(cxx_inline_namespaces)</tt> to determine if support for -inline namespaces is enabled.</p> - -<h4 id="cxx_lambdas">C++11 lambdas</h4> - -<p>Use <tt>__has_feature(cxx_lambdas)</tt> or -<tt>__has_extension(cxx_lambdas)</tt> to determine if support for lambdas -is enabled. </p> - -<h4 id="cxx_local_type_template_args">C++11 local and unnamed types as template arguments</h4> - -<p>Use <tt>__has_feature(cxx_local_type_template_args)</tt> or -<tt>__has_extension(cxx_local_type_template_args)</tt> to determine if -support for local and unnamed types as template arguments is enabled.</p> - -<h4 id="cxx_noexcept">C++11 noexcept</h4> - -<p>Use <tt>__has_feature(cxx_noexcept)</tt> or -<tt>__has_extension(cxx_noexcept)</tt> to determine if support for noexcept -exception specifications is enabled.</p> - -<h4 id="cxx_nonstatic_member_init">C++11 in-class non-static data member initialization</h4> - -<p>Use <tt>__has_feature(cxx_nonstatic_member_init)</tt> to determine whether in-class initialization of non-static data members is enabled.</p> - -<h4 id="cxx_nullptr">C++11 <tt>nullptr</tt></h4> - -<p>Use <tt>__has_feature(cxx_nullptr)</tt> or -<tt>__has_extension(cxx_nullptr)</tt> to determine if support for -<tt>nullptr</tt> is enabled.</p> - -<h4 id="cxx_override_control">C++11 <tt>override control</tt></h4> - -<p>Use <tt>__has_feature(cxx_override_control)</tt> or -<tt>__has_extension(cxx_override_control)</tt> to determine if support for -the override control keywords is enabled.</p> - -<h4 id="cxx_reference_qualified_functions">C++11 reference-qualified functions</h4> -<p>Use <tt>__has_feature(cxx_reference_qualified_functions)</tt> or -<tt>__has_extension(cxx_reference_qualified_functions)</tt> to determine -if support for reference-qualified functions (e.g., member functions with -<code>&</code> or <code>&&</code> applied to <code>*this</code>) -is enabled.</p> - -<h4 id="cxx_range_for">C++11 range-based <tt>for</tt> loop</h4> - -<p>Use <tt>__has_feature(cxx_range_for)</tt> or -<tt>__has_extension(cxx_range_for)</tt> to determine if support for the -range-based for loop is enabled. </p> - -<h4 id="cxx_raw_string_literals">C++11 raw string literals</h4> -<p>Use <tt>__has_feature(cxx_raw_string_literals)</tt> to determine if support -for raw string literals (e.g., <tt>R"x(foo\bar)x"</tt>) is enabled.</p> - -<h4 id="cxx_rvalue_references">C++11 rvalue references</h4> - -<p>Use <tt>__has_feature(cxx_rvalue_references)</tt> or -<tt>__has_extension(cxx_rvalue_references)</tt> to determine if support for -rvalue references is enabled. </p> - -<h4 id="cxx_static_assert">C++11 <tt>static_assert()</tt></h4> - -<p>Use <tt>__has_feature(cxx_static_assert)</tt> or -<tt>__has_extension(cxx_static_assert)</tt> to determine if support for -compile-time assertions using <tt>static_assert</tt> is enabled.</p> - -<h4 id="cxx_auto_type">C++11 type inference</h4> - -<p>Use <tt>__has_feature(cxx_auto_type)</tt> or -<tt>__has_extension(cxx_auto_type)</tt> to determine C++11 type inference is -supported using the <tt>auto</tt> specifier. If this is disabled, <tt>auto</tt> -will instead be a storage class specifier, as in C or C++98.</p> - -<h4 id="cxx_strong_enums">C++11 strongly typed enumerations</h4> - -<p>Use <tt>__has_feature(cxx_strong_enums)</tt> or -<tt>__has_extension(cxx_strong_enums)</tt> to determine if support for -strongly typed, scoped enumerations is enabled.</p> - -<h4 id="cxx_trailing_return">C++11 trailing return type</h4> - -<p>Use <tt>__has_feature(cxx_trailing_return)</tt> or -<tt>__has_extension(cxx_trailing_return)</tt> to determine if support for the -alternate function declaration syntax with trailing return type is enabled.</p> - -<h4 id="cxx_unicode_literals">C++11 Unicode string literals</h4> -<p>Use <tt>__has_feature(cxx_unicode_literals)</tt> to determine if -support for Unicode string literals is enabled.</p> - -<h4 id="cxx_unrestricted_unions">C++11 unrestricted unions</h4> - -<p>Use <tt>__has_feature(cxx_unrestricted_unions)</tt> to determine if support for unrestricted unions is enabled.</p> - -<h4 id="cxx_user_literals">C++11 user-defined literals</h4> - -<p>Use <tt>__has_feature(cxx_user_literals)</tt> to determine if support for user-defined literals is enabled.</p> - -<h4 id="cxx_variadic_templates">C++11 variadic templates</h4> - -<p>Use <tt>__has_feature(cxx_variadic_templates)</tt> or -<tt>__has_extension(cxx_variadic_templates)</tt> to determine if support -for variadic templates is enabled.</p> - -<h3 id="c11">C11</h3> - -<p>The features listed below are part of the C11 standard. As a result, all -these features are enabled with the <tt>-std=c11</tt> or <tt>-std=gnu11</tt> -option when compiling C code. Additionally, because these features are all -backward-compatible, they are available as extensions in all language modes.</p> - -<h4 id="c_alignas">C11 alignment specifiers</h4> - -<p>Use <tt>__has_feature(c_alignas)</tt> or <tt>__has_extension(c_alignas)</tt> -to determine if support for alignment specifiers using <tt>_Alignas</tt> -is enabled.</p> - -<h4 id="c_atomic">C11 atomic operations</h4> - -<p>Use <tt>__has_feature(c_atomic)</tt> or <tt>__has_extension(c_atomic)</tt> -to determine if support for atomic types using <tt>_Atomic</tt> is enabled. -Clang also provides <a href="#__c11_atomic">a set of builtins</a> which can be -used to implement the <tt><stdatomic.h></tt> operations on -<tt>_Atomic</tt> types.</p> - -<h4 id="c_generic_selections">C11 generic selections</h4> - -<p>Use <tt>__has_feature(c_generic_selections)</tt> or -<tt>__has_extension(c_generic_selections)</tt> to determine if support for -generic selections is enabled.</p> - -<p>As an extension, the C11 generic selection expression is available in all -languages supported by Clang. The syntax is the same as that given in the -C11 standard.</p> - -<p>In C, type compatibility is decided according to the rules given in the -appropriate standard, but in C++, which lacks the type compatibility rules -used in C, types are considered compatible only if they are equivalent.</p> - -<h4 id="c_static_assert">C11 <tt>_Static_assert()</tt></h4> - -<p>Use <tt>__has_feature(c_static_assert)</tt> or -<tt>__has_extension(c_static_assert)</tt> to determine if support for -compile-time assertions using <tt>_Static_assert</tt> is enabled.</p> - -<!-- ======================================================================= --> -<h2 id="checking_type_traits">Checks for Type Traits</h2> -<!-- ======================================================================= --> - -<p>Clang supports the <a href="http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html">GNU C++ type traits</a> and a subset of the <a href="http://msdn.microsoft.com/en-us/library/ms177194(v=VS.100).aspx">Microsoft Visual C++ Type traits</a>. For each supported type trait <code>__X</code>, <code>__has_extension(X)</code> indicates the presence of the type trait. For example: -<blockquote> -<pre> -#if __has_extension(is_convertible_to) -template<typename From, typename To> -struct is_convertible_to { - static const bool value = __is_convertible_to(From, To); -}; -#else -// Emulate type trait -#endif -</pre> -</blockquote> - -<p>The following type traits are supported by Clang:</p> -<ul> - <li><code>__has_nothrow_assign</code> (GNU, Microsoft)</li> - <li><code>__has_nothrow_copy</code> (GNU, Microsoft)</li> - <li><code>__has_nothrow_constructor</code> (GNU, Microsoft)</li> - <li><code>__has_trivial_assign</code> (GNU, Microsoft)</li> - <li><code>__has_trivial_copy</code> (GNU, Microsoft)</li> - <li><code>__has_trivial_constructor</code> (GNU, Microsoft)</li> - <li><code>__has_trivial_destructor</code> (GNU, Microsoft)</li> - <li><code>__has_virtual_destructor</code> (GNU, Microsoft)</li> - <li><code>__is_abstract</code> (GNU, Microsoft)</li> - <li><code>__is_base_of</code> (GNU, Microsoft)</li> - <li><code>__is_class</code> (GNU, Microsoft)</li> - <li><code>__is_convertible_to</code> (Microsoft)</li> - <li><code>__is_empty</code> (GNU, Microsoft)</li> - <li><code>__is_enum</code> (GNU, Microsoft)</li> - <li><code>__is_interface_class</code> (Microsoft)</li> - <li><code>__is_pod</code> (GNU, Microsoft)</li> - <li><code>__is_polymorphic</code> (GNU, Microsoft)</li> - <li><code>__is_union</code> (GNU, Microsoft)</li> - <li><code>__is_literal(type)</code>: Determines whether the given type is a literal type</li> - <li><code>__is_final</code>: Determines whether the given type is declared with a <code>final</code> class-virt-specifier.</li> - <li><code>__underlying_type(type)</code>: Retrieves the underlying type for a given <code>enum</code> type. This trait is required to implement the C++11 standard library.</li> - <li><code>__is_trivially_assignable(totype, fromtype)</code>: Determines whether a value of type <tt>totype</tt> can be assigned to from a value of type <tt>fromtype</tt> such that no non-trivial functions are called as part of that assignment. This trait is required to implement the C++11 standard library.</li> - <li><code>__is_trivially_constructible(type, argtypes...)</code>: Determines whether a value of type <tt>type</tt> can be direct-initialized with arguments of types <tt>argtypes...</tt> such that no non-trivial functions are called as part of that initialization. This trait is required to implement the C++11 standard library.</li> -</ul> - -<!-- ======================================================================= --> -<h2 id="blocks">Blocks</h2> -<!-- ======================================================================= --> - -<p>The syntax and high level language feature description is in <a -href="BlockLanguageSpec.txt">BlockLanguageSpec.txt</a>. Implementation and ABI -details for the clang implementation are in <a -href="Block-ABI-Apple.txt">Block-ABI-Apple.txt</a>.</p> - - -<p>Query for this feature with __has_extension(blocks).</p> - -<!-- ======================================================================= --> -<h2 id="objc_features">Objective-C Features</h2> -<!-- ======================================================================= --> - -<h3 id="objc_instancetype">Related result types</h3> - -<p>According to Cocoa conventions, Objective-C methods with certain names ("init", "alloc", etc.) always return objects that are an instance of the receiving class's type. Such methods are said to have a "related result type", meaning that a message send to one of these methods will have the same static type as an instance of the receiver class. For example, given the following classes:</p> - -<blockquote> -<pre> -@interface NSObject -+ (id)alloc; -- (id)init; -@end - -@interface NSArray : NSObject -@end -</pre> -</blockquote> - -<p>and this common initialization pattern</p> - -<blockquote> -<pre> -NSArray *array = [[NSArray alloc] init]; -</pre> -</blockquote> - -<p>the type of the expression <code>[NSArray alloc]</code> is -<code>NSArray*</code> because <code>alloc</code> implicitly has a -related result type. Similarly, the type of the expression -<code>[[NSArray alloc] init]</code> is <code>NSArray*</code>, since -<code>init</code> has a related result type and its receiver is known -to have the type <code>NSArray *</code>. If neither <code>alloc</code> nor <code>init</code> had a related result type, the expressions would have had type <code>id</code>, as declared in the method signature.</p> - -<p>A method with a related result type can be declared by using the -type <tt>instancetype</tt> as its result type. <tt>instancetype</tt> -is a contextual keyword that is only permitted in the result type of -an Objective-C method, e.g.</p> - -<pre> -@interface A -+ (<b>instancetype</b>)constructAnA; -@end -</pre> - -<p>The related result type can also be inferred for some methods. -To determine whether a method has an inferred related result type, the first -word in the camel-case selector (e.g., "init" in "initWithObjects") is -considered, and the method will have a related result type if its return -type is compatible with the type of its class and if</p> - -<ul> - - <li>the first word is "alloc" or "new", and the method is a class - method, or</li> - - <li>the first word is "autorelease", "init", "retain", or "self", - and the method is an instance method.</li> - -</ul> - -<p>If a method with a related result type is overridden by a subclass -method, the subclass method must also return a type that is compatible -with the subclass type. For example:</p> - -<blockquote> -<pre> -@interface NSString : NSObject -- (NSUnrelated *)init; // incorrect usage: NSUnrelated is not NSString or a superclass of NSString -@end -</pre> -</blockquote> - -<p>Related result types only affect the type of a message send or -property access via the given method. In all other respects, a method -with a related result type is treated the same way as method that -returns <tt>id</tt>.</p> - -<p>Use <tt>__has_feature(objc_instancetype)</tt> to determine whether -the <tt>instancetype</tt> contextual keyword is available.</p> - -<!-- ======================================================================= --> -<h2 id="objc_arc">Automatic reference counting </h2> -<!-- ======================================================================= --> - -<p>Clang provides support for <a href="AutomaticReferenceCounting.html">automated reference counting</a> in Objective-C, which eliminates the need for manual retain/release/autorelease message sends. There are two feature macros associated with automatic reference counting: <code>__has_feature(objc_arc)</code> indicates the availability of automated reference counting in general, while <code>__has_feature(objc_arc_weak)</code> indicates that automated reference counting also includes support for <code>__weak</code> pointers to Objective-C objects.</p> - -<!-- ======================================================================= --> -<h2 id="objc_fixed_enum">Enumerations with a fixed underlying type</h2> -<!-- ======================================================================= --> - -<p>Clang provides support for C++11 enumerations with a fixed -underlying type within Objective-C. For example, one can write an -enumeration type as:</p> - -<pre> -typedef enum : unsigned char { Red, Green, Blue } Color; -</pre> - -<p>This specifies that the underlying type, which is used to store the -enumeration value, is <tt>unsigned char</tt>.</p> - -<p>Use <tt>__has_feature(objc_fixed_enum)</tt> to determine whether -support for fixed underlying types is available in Objective-C.</p> - -<!-- ======================================================================= --> -<h2 id="objc_lambdas">Interoperability with C++11 lambdas</h2> -<!-- ======================================================================= --> - -<p>Clang provides interoperability between C++11 lambdas and -blocks-based APIs, by permitting a lambda to be implicitly converted -to a block pointer with the corresponding signature. For example, -consider an API such as <code>NSArray</code>'s array-sorting -method:</p> - -<pre> - (NSArray *)sortedArrayUsingComparator:(NSComparator)cmptr; </pre> - -<p><code>NSComparator</code> is simply a typedef for the block pointer -<code>NSComparisonResult (^)(id, id)</code>, and parameters of this -type are generally provided with block literals as arguments. However, -one can also use a C++11 lambda so long as it provides the same -signature (in this case, accepting two parameters of type -<code>id</code> and returning an <code>NSComparisonResult</code>):</p> - -<pre> - NSArray *array = @[@"string 1", @"string 21", @"string 12", @"String 11", - @"String 02"]; - const NSStringCompareOptions comparisonOptions - = NSCaseInsensitiveSearch | NSNumericSearch | - NSWidthInsensitiveSearch | NSForcedOrderingSearch; - NSLocale *currentLocale = [NSLocale currentLocale]; - NSArray *sorted - = [array sortedArrayUsingComparator:<b>[=](id s1, id s2) -> NSComparisonResult { - NSRange string1Range = NSMakeRange(0, [s1 length]); - return [s1 compare:s2 options:comparisonOptions - range:string1Range locale:currentLocale]; - }</b>]; - NSLog(@"sorted: %@", sorted); -</pre> - -<p>This code relies on an implicit conversion from the type of the -lambda expression (an unnamed, local class type called the <i>closure -type</i>) to the corresponding block pointer type. The conversion -itself is expressed by a conversion operator in that closure type -that produces a block pointer with the same signature as the lambda -itself, e.g.,</p> - -<pre> - operator NSComparisonResult (^)(id, id)() const; -</pre> - -<p>This conversion function returns a new block that simply forwards -the two parameters to the lambda object (which it captures by copy), -then returns the result. The returned block is first copied (with -<tt>Block_copy</tt>) and then autoreleased. As an optimization, if a -lambda expression is immediately converted to a block pointer (as in -the first example, above), then the block is not copied and -autoreleased: rather, it is given the same lifetime as a block literal -written at that point in the program, which avoids the overhead of -copying a block to the heap in the common case.</p> - -<p>The conversion from a lambda to a block pointer is only available -in Objective-C++, and not in C++ with blocks, due to its use of -Objective-C memory management (autorelease).</p> - -<!-- ======================================================================= --> -<h2 id="objc_object_literals_subscripting">Object Literals and Subscripting</h2> -<!-- ======================================================================= --> - -<p>Clang provides support for <a href="ObjectiveCLiterals.html">Object Literals -and Subscripting</a> in Objective-C, which simplifies common Objective-C -programming patterns, makes programs more concise, and improves the safety of -container creation. There are several feature macros associated with object -literals and subscripting: <code>__has_feature(objc_array_literals)</code> -tests the availability of array literals; -<code>__has_feature(objc_dictionary_literals)</code> tests the availability of -dictionary literals; <code>__has_feature(objc_subscripting)</code> tests the -availability of object subscripting.</p> - -<!-- ======================================================================= --> -<h2 id="objc_default_synthesize_properties">Objective-C Autosynthesis of Properties</h2> -<!-- ======================================================================= --> - -<p> Clang provides support for autosynthesis of declared properties. Using this -feature, clang provides default synthesis of those properties not declared @dynamic -and not having user provided backing getter and setter methods. -<code>__has_feature(objc_default_synthesize_properties)</code> checks for availability -of this feature in version of clang being used.</p> - -<!-- ======================================================================= --> -<h2 id="overloading-in-c">Function Overloading in C</h2> -<!-- ======================================================================= --> - -<p>Clang provides support for C++ function overloading in C. Function -overloading in C is introduced using the <tt>overloadable</tt> attribute. For -example, one might provide several overloaded versions of a <tt>tgsin</tt> -function that invokes the appropriate standard function computing the sine of a -value with <tt>float</tt>, <tt>double</tt>, or <tt>long double</tt> -precision:</p> - -<blockquote> -<pre> -#include <math.h> -float <b>__attribute__((overloadable))</b> tgsin(float x) { return sinf(x); } -double <b>__attribute__((overloadable))</b> tgsin(double x) { return sin(x); } -long double <b>__attribute__((overloadable))</b> tgsin(long double x) { return sinl(x); } -</pre> -</blockquote> - -<p>Given these declarations, one can call <tt>tgsin</tt> with a -<tt>float</tt> value to receive a <tt>float</tt> result, with a -<tt>double</tt> to receive a <tt>double</tt> result, etc. Function -overloading in C follows the rules of C++ function overloading to pick -the best overload given the call arguments, with a few C-specific -semantics:</p> -<ul> - <li>Conversion from <tt>float</tt> or <tt>double</tt> to <tt>long - double</tt> is ranked as a floating-point promotion (per C99) rather - than as a floating-point conversion (as in C++).</li> - - <li>A conversion from a pointer of type <tt>T*</tt> to a pointer of type - <tt>U*</tt> is considered a pointer conversion (with conversion - rank) if <tt>T</tt> and <tt>U</tt> are compatible types.</li> - - <li>A conversion from type <tt>T</tt> to a value of type <tt>U</tt> - is permitted if <tt>T</tt> and <tt>U</tt> are compatible types. This - conversion is given "conversion" rank.</li> -</ul> - -<p>The declaration of <tt>overloadable</tt> functions is restricted to -function declarations and definitions. Most importantly, if any -function with a given name is given the <tt>overloadable</tt> -attribute, then all function declarations and definitions with that -name (and in that scope) must have the <tt>overloadable</tt> -attribute. This rule even applies to redeclarations of functions whose original -declaration had the <tt>overloadable</tt> attribute, e.g.,</p> - -<blockquote> -<pre> -int f(int) __attribute__((overloadable)); -float f(float); <i>// error: declaration of "f" must have the "overloadable" attribute</i> - -int g(int) __attribute__((overloadable)); -int g(int) { } <i>// error: redeclaration of "g" must also have the "overloadable" attribute</i> -</pre> -</blockquote> - -<p>Functions marked <tt>overloadable</tt> must have -prototypes. Therefore, the following code is ill-formed:</p> - -<blockquote> -<pre> -int h() __attribute__((overloadable)); <i>// error: h does not have a prototype</i> -</pre> -</blockquote> - -<p>However, <tt>overloadable</tt> functions are allowed to use a -ellipsis even if there are no named parameters (as is permitted in C++). This feature is particularly useful when combined with the <tt>unavailable</tt> attribute:</p> - -<blockquote> -<pre> -void honeypot(...) __attribute__((overloadable, unavailable)); <i>// calling me is an error</i> -</pre> -</blockquote> - -<p>Functions declared with the <tt>overloadable</tt> attribute have -their names mangled according to the same rules as C++ function -names. For example, the three <tt>tgsin</tt> functions in our -motivating example get the mangled names <tt>_Z5tgsinf</tt>, -<tt>_Z5tgsind</tt>, and <tt>_Z5tgsine</tt>, respectively. There are two -caveats to this use of name mangling:</p> - -<ul> - - <li>Future versions of Clang may change the name mangling of - functions overloaded in C, so you should not depend on an specific - mangling. To be completely safe, we strongly urge the use of - <tt>static inline</tt> with <tt>overloadable</tt> functions.</li> - - <li>The <tt>overloadable</tt> attribute has almost no meaning when - used in C++, because names will already be mangled and functions are - already overloadable. However, when an <tt>overloadable</tt> - function occurs within an <tt>extern "C"</tt> linkage specification, - it's name <i>will</i> be mangled in the same way as it would in - C.</li> -</ul> - -<p>Query for this feature with __has_extension(attribute_overloadable).</p> - -<!-- ======================================================================= --> -<h2 id="complex-list-init">Initializer lists for complex numbers in C</h2> -<!-- ======================================================================= --> - -<p>clang supports an extension which allows the following in C:</p> - -<blockquote> -<pre> -#include <math.h> -#include <complex.h> -complex float x = { 1.0f, INFINITY }; // Init to (1, Inf) -</pre> -</blockquote> - -<p>This construct is useful because there is no way to separately -initialize the real and imaginary parts of a complex variable in -standard C, given that clang does not support <code>_Imaginary</code>. -(clang also supports the <code>__real__</code> and <code>__imag__</code> -extensions from gcc, which help in some cases, but are not usable in -static initializers.) - -<p>Note that this extension does not allow eliding the braces; the -meaning of the following two lines is different:</p> - -<blockquote> -<pre> -complex float x[] = { { 1.0f, 1.0f } }; // [0] = (1, 1) -complex float x[] = { 1.0f, 1.0f }; // [0] = (1, 0), [1] = (1, 0) -</pre> -</blockquote> - -<p>This extension also works in C++ mode, as far as that goes, but does not - apply to the C++ <code>std::complex</code>. (In C++11, list - initialization allows the same syntax to be used with - <code>std::complex</code> with the same meaning.) - -<!-- ======================================================================= --> -<h2 id="builtins">Builtin Functions</h2> -<!-- ======================================================================= --> - -<p>Clang supports a number of builtin library functions with the same syntax as -GCC, including things like <tt>__builtin_nan</tt>, -<tt>__builtin_constant_p</tt>, <tt>__builtin_choose_expr</tt>, -<tt>__builtin_types_compatible_p</tt>, <tt>__sync_fetch_and_add</tt>, etc. In -addition to the GCC builtins, Clang supports a number of builtins that GCC does -not, which are listed here.</p> - -<p>Please note that Clang does not and will not support all of the GCC builtins -for vector operations. Instead of using builtins, you should use the functions -defined in target-specific header files like <tt><xmmintrin.h></tt>, which -define portable wrappers for these. Many of the Clang versions of these -functions are implemented directly in terms of <a href="#vectors">extended -vector support</a> instead of builtins, in order to reduce the number of -builtins that we need to implement.</p> - -<!-- ======================================================================= --> -<h3><a name="__builtin_readcyclecounter">__builtin_readcyclecounter</a></h3> -<!-- ======================================================================= --> - -<p><tt>__builtin_readcyclecounter</tt> is used to access the cycle counter -register (or a similar low-latency, high-accuracy clock) on those targets that -support it. -</p> - -<p><b>Syntax:</b></p> - -<pre> -__builtin_readcyclecounter() -</pre> - -<p><b>Example of Use:</b></p> - -<pre> -unsigned long long t0 = __builtin_readcyclecounter(); -do_something(); -unsigned long long t1 = __builtin_readcyclecounter(); -unsigned long long cycles_to_do_something = t1 - t0; // assuming no overflow -</pre> - -<p><b>Description:</b></p> - -<p>The __builtin_readcyclecounter() builtin returns the cycle counter value, -which may be either global or process/thread-specific depending on the target. -As the backing counters often overflow quickly (on the order of -seconds) this should only be used for timing small intervals. When not -supported by the target, the return value is always zero. This builtin -takes no arguments and produces an unsigned long long result. -</p> - -<p>Query for this feature with __has_builtin(__builtin_readcyclecounter).</p> - -<!-- ======================================================================= --> -<h3><a name="__builtin_shufflevector">__builtin_shufflevector</a></h3> -<!-- ======================================================================= --> - -<p><tt>__builtin_shufflevector</tt> is used to express generic vector -permutation/shuffle/swizzle operations. This builtin is also very important for -the implementation of various target-specific header files like -<tt><xmmintrin.h></tt>. -</p> - -<p><b>Syntax:</b></p> - -<pre> -__builtin_shufflevector(vec1, vec2, index1, index2, ...) -</pre> - -<p><b>Examples:</b></p> - -<pre> - // Identity operation - return 4-element vector V1. - __builtin_shufflevector(V1, V1, 0, 1, 2, 3) - - // "Splat" element 0 of V1 into a 4-element result. - __builtin_shufflevector(V1, V1, 0, 0, 0, 0) - - // Reverse 4-element vector V1. - __builtin_shufflevector(V1, V1, 3, 2, 1, 0) - - // Concatenate every other element of 4-element vectors V1 and V2. - __builtin_shufflevector(V1, V2, 0, 2, 4, 6) - - // Concatenate every other element of 8-element vectors V1 and V2. - __builtin_shufflevector(V1, V2, 0, 2, 4, 6, 8, 10, 12, 14) -</pre> - -<p><b>Description:</b></p> - -<p>The first two arguments to __builtin_shufflevector are vectors that have the -same element type. The remaining arguments are a list of integers that specify -the elements indices of the first two vectors that should be extracted and -returned in a new vector. These element indices are numbered sequentially -starting with the first vector, continuing into the second vector. Thus, if -vec1 is a 4-element vector, index 5 would refer to the second element of vec2. -</p> - -<p>The result of __builtin_shufflevector is a vector -with the same element type as vec1/vec2 but that has an element count equal to -the number of indices specified. -</p> - -<p>Query for this feature with __has_builtin(__builtin_shufflevector).</p> - -<!-- ======================================================================= --> -<h3><a name="__builtin_unreachable">__builtin_unreachable</a></h3> -<!-- ======================================================================= --> - -<p><tt>__builtin_unreachable</tt> is used to indicate that a specific point in -the program cannot be reached, even if the compiler might otherwise think it -can. This is useful to improve optimization and eliminates certain warnings. -For example, without the <tt>__builtin_unreachable</tt> in the example below, -the compiler assumes that the inline asm can fall through and prints a "function -declared 'noreturn' should not return" warning. -</p> - -<p><b>Syntax:</b></p> - -<pre> -__builtin_unreachable() -</pre> - -<p><b>Example of Use:</b></p> - -<pre> -void myabort(void) __attribute__((noreturn)); -void myabort(void) { - asm("int3"); - __builtin_unreachable(); -} -</pre> - -<p><b>Description:</b></p> - -<p>The __builtin_unreachable() builtin has completely undefined behavior. Since -it has undefined behavior, it is a statement that it is never reached and the -optimizer can take advantage of this to produce better code. This builtin takes -no arguments and produces a void result. -</p> - -<p>Query for this feature with __has_builtin(__builtin_unreachable).</p> - -<!-- ======================================================================= --> -<h3><a name="__sync_swap">__sync_swap</a></h3> -<!-- ======================================================================= --> - -<p><tt>__sync_swap</tt> is used to atomically swap integers or pointers in -memory. -</p> - -<p><b>Syntax:</b></p> - -<pre> -<i>type</i> __sync_swap(<i>type</i> *ptr, <i>type</i> value, ...) -</pre> - -<p><b>Example of Use:</b></p> - -<pre> -int old_value = __sync_swap(&value, new_value); -</pre> - -<p><b>Description:</b></p> - -<p>The __sync_swap() builtin extends the existing __sync_*() family of atomic -intrinsics to allow code to atomically swap the current value with the new -value. More importantly, it helps developers write more efficient and correct -code by avoiding expensive loops around __sync_bool_compare_and_swap() or -relying on the platform specific implementation details of -__sync_lock_test_and_set(). The __sync_swap() builtin is a full barrier. -</p> - -<!-- ======================================================================= --> -<h3><a name="__c11_atomic">__c11_atomic builtins</a></h3> -<!-- ======================================================================= --> - -<p>Clang provides a set of builtins which are intended to be used to implement -C11's <tt><stdatomic.h></tt> header. These builtins provide the semantics -of the <tt>_explicit</tt> form of the corresponding C11 operation, and are named -with a <tt>__c11_</tt> prefix. The supported operations are:</p> - -<ul> - <li><tt>__c11_atomic_init</tt></li> - <li><tt>__c11_atomic_thread_fence</tt></li> - <li><tt>__c11_atomic_signal_fence</tt></li> - <li><tt>__c11_atomic_is_lock_free</tt></li> - <li><tt>__c11_atomic_store</tt></li> - <li><tt>__c11_atomic_load</tt></li> - <li><tt>__c11_atomic_exchange</tt></li> - <li><tt>__c11_atomic_compare_exchange_strong</tt></li> - <li><tt>__c11_atomic_compare_exchange_weak</tt></li> - <li><tt>__c11_atomic_fetch_add</tt></li> - <li><tt>__c11_atomic_fetch_sub</tt></li> - <li><tt>__c11_atomic_fetch_and</tt></li> - <li><tt>__c11_atomic_fetch_or</tt></li> - <li><tt>__c11_atomic_fetch_xor</tt></li> -</ul> - -<!-- ======================================================================= --> -<h2 id="non-standard-attributes">Non-standard C++11 Attributes</h2> -<!-- ======================================================================= --> - -<p>Clang supports one non-standard C++11 attribute. It resides in the -<tt>clang</tt> attribute namespace.</p> - -<!-- ======================================================================= --> -<h3 id="clang__fallthrough">The <tt>clang::fallthrough</tt> attribute</h3> -<!-- ======================================================================= --> - -<p>The <tt>clang::fallthrough</tt> attribute is used along with the -<tt>-Wimplicit-fallthrough</tt> argument to annotate intentional fall-through -between switch labels. It can only be applied to a null statement placed at a -point of execution between any statement and the next switch label. It is common -to mark these places with a specific comment, but this attribute is meant to -replace comments with a more strict annotation, which can be checked by the -compiler. This attribute doesn't change semantics of the code and can be used -wherever an intended fall-through occurs. It is designed to mimic -control-flow statements like <tt>break;</tt>, so it can be placed in most places -where <tt>break;</tt> can, but only if there are no statements on the execution -path between it and the next switch label.</p> -<p>Here is an example:</p> -<pre> -// compile with -Wimplicit-fallthrough -switch (n) { -case 22: -case 33: // no warning: no statements between case labels - f(); -case 44: // warning: unannotated fall-through - g(); - <b>[[clang::fallthrough]];</b> -case 55: // no warning - if (x) { - h(); - break; - } - else { - i(); - <b>[[clang::fallthrough]];</b> - } -case 66: // no warning - p(); - <b>[[clang::fallthrough]];</b> // warning: fallthrough annotation does not directly precede case label - q(); -case 77: // warning: unannotated fall-through - r(); -} -</pre> - -<!-- ======================================================================= --> -<h2 id="targetspecific">Target-Specific Extensions</h2> -<!-- ======================================================================= --> - -<p>Clang supports some language features conditionally on some targets.</p> - -<!-- ======================================================================= --> -<h3 id="x86-specific">X86/X86-64 Language Extensions</h3> -<!-- ======================================================================= --> - -<p>The X86 backend has these language extensions:</p> - -<!-- ======================================================================= --> -<h4 id="x86-gs-segment">Memory references off the GS segment</h4> -<!-- ======================================================================= --> - -<p>Annotating a pointer with address space #256 causes it to be code generated -relative to the X86 GS segment register, and address space #257 causes it to be -relative to the X86 FS segment. Note that this is a very very low-level -feature that should only be used if you know what you're doing (for example in -an OS kernel).</p> - -<p>Here is an example:</p> - -<pre> -#define GS_RELATIVE __attribute__((address_space(256))) -int foo(int GS_RELATIVE *P) { - return *P; -} -</pre> - -<p>Which compiles to (on X86-32):</p> - -<pre> -_foo: - movl 4(%esp), %eax - movl %gs:(%eax), %eax - ret -</pre> - -<!-- ======================================================================= --> -<h2 id="analyzerspecific">Static Analysis-Specific Extensions</h2> -<!-- ======================================================================= --> - -<p>Clang supports additional attributes that are useful for documenting program -invariants and rules for static analysis tools. The extensions documented here -are used by the <a -href="http://clang.llvm.org/StaticAnalysis.html">path-sensitive static analyzer -engine</a> that is part of Clang's Analysis library.</p> - -<h3 id="attr_analyzer_noreturn">The <tt>analyzer_noreturn</tt> attribute</h3> - -<p>Clang's static analysis engine understands the standard <tt>noreturn</tt> -attribute. This attribute, which is typically affixed to a function prototype, -indicates that a call to a given function never returns. Function prototypes for -common functions like <tt>exit</tt> are typically annotated with this attribute, -as well as a variety of common assertion handlers. Users can educate the static -analyzer about their own custom assertion handles (thus cutting down on false -positives due to false paths) by marking their own "panic" functions -with this attribute.</p> - -<p>While useful, <tt>noreturn</tt> is not applicable in all cases. Sometimes -there are special functions that for all intents and purposes should be -considered panic functions (i.e., they are only called when an internal program -error occurs) but may actually return so that the program can fail gracefully. -The <tt>analyzer_noreturn</tt> attribute allows one to annotate such functions -as being interpreted as "no return" functions by the analyzer (thus -pruning bogus paths) but will not affect compilation (as in the case of -<tt>noreturn</tt>).</p> - -<p><b>Usage</b>: The <tt>analyzer_noreturn</tt> attribute can be placed in the -same places where the <tt>noreturn</tt> attribute can be placed. It is commonly -placed at the end of function prototypes:</p> - -<pre> - void foo() <b>__attribute__((analyzer_noreturn))</b>; -</pre> - -<p>Query for this feature with -<tt>__has_attribute(analyzer_noreturn)</tt>.</p> - -<h3 id="attr_method_family">The <tt>objc_method_family</tt> attribute</h3> - -<p>Many methods in Objective-C have conventional meanings determined -by their selectors. For the purposes of static analysis, it is -sometimes useful to be able to mark a method as having a particular -conventional meaning despite not having the right selector, or as not -having the conventional meaning that its selector would suggest. -For these use cases, we provide an attribute to specifically describe -the <q>method family</q> that a method belongs to.</p> - -<p><b>Usage</b>: <tt>__attribute__((objc_method_family(X)))</tt>, -where <tt>X</tt> is one of <tt>none</tt>, <tt>alloc</tt>, <tt>copy</tt>, -<tt>init</tt>, <tt>mutableCopy</tt>, or <tt>new</tt>. This attribute -can only be placed at the end of a method declaration:</p> - -<pre> - - (NSString*) initMyStringValue <b>__attribute__((objc_method_family(none)))</b>; -</pre> - -<p>Users who do not wish to change the conventional meaning of a -method, and who merely want to document its non-standard retain and -release semantics, should use the -<a href="#attr_retain_release">retaining behavior attributes</a> -described below.</p> - -<p>Query for this feature with -<tt>__has_attribute(objc_method_family)</tt>.</p> - -<h3 id="attr_retain_release">Objective-C retaining behavior attributes</h3> - -<p>In Objective-C, functions and methods are generally assumed to take -and return objects with +0 retain counts, with some exceptions for -special methods like <tt>+alloc</tt> and <tt>init</tt>. However, -there are exceptions, and so Clang provides attributes to allow these -exceptions to be documented, which helps the analyzer find leaks (and -ignore non-leaks). Some exceptions may be better described using -the <a href="#attr_method_family"><tt>objc_method_family</tt></a> -attribute instead.</p> - -<p><b>Usage</b>: The <tt>ns_returns_retained</tt>, <tt>ns_returns_not_retained</tt>, -<tt>ns_returns_autoreleased</tt>, <tt>cf_returns_retained</tt>, -and <tt>cf_returns_not_retained</tt> attributes can be placed on -methods and functions that return Objective-C or CoreFoundation -objects. They are commonly placed at the end of a function prototype -or method declaration:</p> - -<pre> - id foo() <b>__attribute__((ns_returns_retained))</b>; - - - (NSString*) bar: (int) x <b>__attribute__((ns_returns_retained))</b>; -</pre> - -<p>The <tt>*_returns_retained</tt> attributes specify that the -returned object has a +1 retain count. -The <tt>*_returns_not_retained</tt> attributes specify that the return -object has a +0 retain count, even if the normal convention for its -selector would be +1. <tt>ns_returns_autoreleased</tt> specifies that the -returned object is +0, but is guaranteed to live at least as long as the -next flush of an autorelease pool.</p> - -<p><b>Usage</b>: The <tt>ns_consumed</tt> and <tt>cf_consumed</tt> -attributes can be placed on an parameter declaration; they specify -that the argument is expected to have a +1 retain count, which will be -balanced in some way by the function or method. -The <tt>ns_consumes_self</tt> attribute can only be placed on an -Objective-C method; it specifies that the method expects -its <tt>self</tt> parameter to have a +1 retain count, which it will -balance in some way.</p> - -<pre> - void <b>foo(__attribute__((ns_consumed))</b> NSString *string); - - - (void) bar <b>__attribute__((ns_consumes_self))</b>; - - (void) baz: (id) <b>__attribute__((ns_consumed))</b> x; -</pre> - -<p>Query for these features with <tt>__has_attribute(ns_consumed)</tt>, -<tt>__has_attribute(ns_returns_retained)</tt>, etc.</p> - -<!-- ======================================================================= --> -<h2 id="dynamicanalyzerspecific">Dynamic Analysis-Specific Extensions</h2> -<!-- ======================================================================= --> -<h3 id="address_sanitizer">AddressSanitizer</h3> -<p> Use <code>__has_feature(address_sanitizer)</code> -to check if the code is being built with <a - href="AddressSanitizer.html">AddressSanitizer</a>. -</p> -<p>Use <tt>__attribute__((no_address_safety_analysis))</tt> on a function -declaration to specify that address safety instrumentation (e.g. -AddressSanitizer) should not be applied to that function. -</p> - -<!-- ======================================================================= --> -<h2 id="threadsafety">Thread-Safety Annotation Checking</h2> -<!-- ======================================================================= --> - -<p>Clang supports additional attributes for checking basic locking policies in -multithreaded programs. -Clang currently parses the following list of attributes, although -<b>the implementation for these annotations is currently in development.</b> -For more details, see the -<a href="http://gcc.gnu.org/wiki/ThreadSafetyAnnotation">GCC implementation</a>. -</p> - -<h4 id="ts_noanal">no_thread_safety_analysis</h4> - -<p>Use <tt>__attribute__((no_thread_safety_analysis))</tt> on a function -declaration to specify that the thread safety analysis should not be run on that -function. This attribute provides an escape hatch (e.g. for situations when it -is difficult to annotate the locking policy). </p> - -<h4 id="ts_lockable">lockable</h4> - -<p>Use <tt>__attribute__((lockable))</tt> on a class definition to specify -that it has a lockable type (e.g. a Mutex class). This annotation is primarily -used to check consistency.</p> - -<h4 id="ts_scopedlockable">scoped_lockable</h4> - -<p>Use <tt>__attribute__((scoped_lockable))</tt> on a class definition to -specify that it has a "scoped" lockable type. Objects of this type will acquire -the lock upon construction and release it upon going out of scope. - This annotation is primarily used to check -consistency.</p> - -<h4 id="ts_guardedvar">guarded_var</h4> - -<p>Use <tt>__attribute__((guarded_var))</tt> on a variable declaration to -specify that the variable must be accessed while holding some lock.</p> - -<h4 id="ts_ptguardedvar">pt_guarded_var</h4> - -<p>Use <tt>__attribute__((pt_guarded_var))</tt> on a pointer declaration to -specify that the pointer must be dereferenced while holding some lock.</p> - -<h4 id="ts_guardedby">guarded_by(l)</h4> - -<p>Use <tt>__attribute__((guarded_by(l)))</tt> on a variable declaration to -specify that the variable must be accessed while holding lock <tt>l</tt>.</p> - -<h4 id="ts_ptguardedby">pt_guarded_by(l)</h4> - -<p>Use <tt>__attribute__((pt_guarded_by(l)))</tt> on a pointer declaration to -specify that the pointer must be dereferenced while holding lock <tt>l</tt>.</p> - -<h4 id="ts_acquiredbefore">acquired_before(...)</h4> - -<p>Use <tt>__attribute__((acquired_before(...)))</tt> on a declaration -of a lockable variable to specify that the lock must be acquired before all -attribute arguments. Arguments must be lockable type, and there must be at -least one argument.</p> - -<h4 id="ts_acquiredafter">acquired_after(...)</h4> - -<p>Use <tt>__attribute__((acquired_after(...)))</tt> on a declaration -of a lockable variable to specify that the lock must be acquired after all -attribute arguments. Arguments must be lockable type, and there must be at -least one argument.</p> - -<h4 id="ts_elf">exclusive_lock_function(...)</h4> - -<p>Use <tt>__attribute__((exclusive_lock_function(...)))</tt> on a function -declaration to specify that the function acquires all listed locks -exclusively. This attribute takes zero or more arguments: either of lockable -type or integers indexing into function parameters of lockable type. If no -arguments are given, the acquired lock is implicitly <tt>this</tt> of the -enclosing object.</p> - -<h4 id="ts_slf">shared_lock_function(...)</h4> - -<p>Use <tt>__attribute__((shared_lock_function(...)))</tt> on a function -declaration to specify that the function acquires all listed locks, although - the locks may be shared (e.g. read locks). This attribute takes zero or more -arguments: either of lockable type or integers indexing into function -parameters of lockable type. If no arguments are given, the acquired lock is -implicitly <tt>this</tt> of the enclosing object.</p> - -<h4 id="ts_etf">exclusive_trylock_function(...)</h4> - -<p>Use <tt>__attribute__((exclusive_lock_function(...)))</tt> on a function -declaration to specify that the function will try (without blocking) to acquire -all listed locks exclusively. This attribute takes one or more arguments. The -first argument is an integer or boolean value specifying the return value of a -successful lock acquisition. The remaining arugments are either of lockable type -or integers indexing into function parameters of lockable type. If only one -argument is given, the acquired lock is implicitly <tt>this</tt> of the -enclosing object.</p> - -<h4 id="ts_stf">shared_trylock_function(...)</h4> - -<p>Use <tt>__attribute__((shared_lock_function(...)))</tt> on a function -declaration to specify that the function will try (without blocking) to acquire -all listed locks, although the locks may be shared (e.g. read locks). This -attribute takes one or more arguments. The first argument is an integer or -boolean value specifying the return value of a successful lock acquisition. The -remaining arugments are either of lockable type or integers indexing into -function parameters of lockable type. If only one argument is given, the -acquired lock is implicitly <tt>this</tt> of the enclosing object.</p> - -<h4 id="ts_uf">unlock_function(...)</h4> - -<p>Use <tt>__attribute__((unlock_function(...)))</tt> on a function -declaration to specify that the function release all listed locks. This -attribute takes zero or more arguments: either of lockable type or integers -indexing into function parameters of lockable type. If no arguments are given, -the acquired lock is implicitly <tt>this</tt> of the enclosing object.</p> - -<h4 id="ts_lr">lock_returned(l)</h4> - -<p>Use <tt>__attribute__((lock_returned(l)))</tt> on a function -declaration to specify that the function returns lock <tt>l</tt> (<tt>l</tt> -must be of lockable type). This annotation is used to aid in resolving lock -expressions.</p> - -<h4 id="ts_le">locks_excluded(...)</h4> - -<p>Use <tt>__attribute__((locks_excluded(...)))</tt> on a function declaration -to specify that the function must not be called with the listed locks. Arguments -must be lockable type, and there must be at least one argument.</p> - -<h4 id="ts_elr">exclusive_locks_required(...)</h4> - -<p>Use <tt>__attribute__((exclusive_locks_required(...)))</tt> on a function -declaration to specify that the function must be called while holding the listed -exclusive locks. Arguments must be lockable type, and there must be at -least one argument.</p> - -<h4 id="ts_slr">shared_locks_required(...)</h4> - -<p>Use <tt>__attribute__((shared_locks_required(...)))</tt> on a function -declaration to specify that the function must be called while holding the listed -shared locks. Arguments must be lockable type, and there must be at -least one argument.</p> - -<!-- ======================================================================= --> -<h2 id="type_safety">Type Safety Checking</h2> -<!-- ======================================================================= --> - -<p>Clang supports additional attributes to enable checking type safety -properties that can't be enforced by C type system. Usecases include:</p> -<ul> -<li>MPI library implementations, where these attributes enable checking that - buffer type matches the passed <tt>MPI_Datatype</tt>;</li> -<li>for HDF5 library there is a similar usecase as MPI;</li> -<li>checking types of variadic functions' arguments for functions like - <tt>fcntl()</tt> and <tt>ioctl()</tt>.</li> -</ul> - -<p>You can detect support for these attributes with __has_attribute(). For -example:</p> - -<blockquote> -<pre> -#if defined(__has_attribute) -# if __has_attribute(argument_with_type_tag) && \ - __has_attribute(pointer_with_type_tag) && \ - __has_attribute(type_tag_for_datatype) -# define ATTR_MPI_PWT(buffer_idx, type_idx) __attribute__((pointer_with_type_tag(mpi,buffer_idx,type_idx))) -/* ... other macros ... */ -# endif -#endif - -#if !defined(ATTR_MPI_PWT) -#define ATTR_MPI_PWT(buffer_idx, type_idx) -#endif - -int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */) - ATTR_MPI_PWT(1,3); -</pre> -</blockquote> - -<h3 id="argument_with_type_tag"><tt>argument_with_type_tag(...)</tt></h3> - -<p>Use <tt>__attribute__((argument_with_type_tag(arg_kind, arg_idx, -type_tag_idx)))</tt> on a function declaration to specify that the function -accepts a type tag that determines the type of some other argument. -<tt>arg_kind</tt> is an identifier that should be used when annotating all -applicable type tags.</p> - -<p>This attribute is primarily useful for checking arguments of variadic -functions (<tt>pointer_with_type_tag</tt> can be used in most of non-variadic -cases).</p> - -<p>For example:</p> -<blockquote> -<pre> -int fcntl(int fd, int cmd, ...) - __attribute__(( argument_with_type_tag(fcntl,3,2) )); -</pre> -</blockquote> - -<h3 id="pointer_with_type_tag"><tt>pointer_with_type_tag(...)</tt></h3> - -<p>Use <tt>__attribute__((pointer_with_type_tag(ptr_kind, ptr_idx, -type_tag_idx)))</tt> on a function declaration to specify that the -function accepts a type tag that determines the pointee type of some other -pointer argument.</p> - -<p>For example:</p> -<blockquote> -<pre> -int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */) - __attribute__(( pointer_with_type_tag(mpi,1,3) )); -</pre> -</blockquote> - -<h3 id="type_tag_for_datatype"><tt>type_tag_for_datatype(...)</tt></h3> - -<p>Clang supports annotating type tags of two forms.</p> - -<ul> -<li><b>Type tag that is an expression containing a reference to some declared -identifier.</b> Use <tt>__attribute__((type_tag_for_datatype(kind, type)))</tt> -on a declaration with that identifier: - -<blockquote> -<pre> -extern struct mpi_datatype mpi_datatype_int - __attribute__(( type_tag_for_datatype(mpi,int) )); -#define MPI_INT ((MPI_Datatype) &mpi_datatype_int) -</pre> -</blockquote></li> - -<li><b>Type tag that is an integral literal.</b> Introduce a <tt>static -const</tt> variable with a corresponding initializer value and attach -<tt>__attribute__((type_tag_for_datatype(kind, type)))</tt> on that -declaration, for example: - -<blockquote> -<pre> -#define MPI_INT ((MPI_Datatype) 42) -static const MPI_Datatype mpi_datatype_int - __attribute__(( type_tag_for_datatype(mpi,int) )) = 42 -</pre> -</blockquote></li> -</ul> - -<p>The attribute also accepts an optional third argument that determines how -the expression is compared to the type tag. There are two supported flags:</p> - -<ul><li><tt>layout_compatible</tt> will cause types to be compared according to -layout-compatibility rules (C++11 [class.mem] p 17, 18). This is -implemented to support annotating types like <tt>MPI_DOUBLE_INT</tt>. - -<p>For example:</p> -<blockquote> -<pre> -/* In mpi.h */ -struct internal_mpi_double_int { double d; int i; }; -extern struct mpi_datatype mpi_datatype_double_int - __attribute__(( type_tag_for_datatype(mpi, struct internal_mpi_double_int, - layout_compatible) )); - -#define MPI_DOUBLE_INT ((MPI_Datatype) &mpi_datatype_double_int) - -/* In user code */ -struct my_pair { double a; int b; }; -struct my_pair *buffer; -MPI_Send(buffer, 1, MPI_DOUBLE_INT /*, ... */); // no warning - -struct my_int_pair { int a; int b; } -struct my_int_pair *buffer2; -MPI_Send(buffer2, 1, MPI_DOUBLE_INT /*, ... */); // warning: actual buffer element - // type 'struct my_int_pair' - // doesn't match specified MPI_Datatype -</pre> -</blockquote> -</li> - -<li><tt>must_be_null</tt> specifies that the expression should be a null -pointer constant, for example: - -<blockquote> -<pre> -/* In mpi.h */ -extern struct mpi_datatype mpi_datatype_null - __attribute__(( type_tag_for_datatype(mpi, void, must_be_null) )); - -#define MPI_DATATYPE_NULL ((MPI_Datatype) &mpi_datatype_null) - -/* In user code */ -MPI_Send(buffer, 1, MPI_DATATYPE_NULL /*, ... */); // warning: MPI_DATATYPE_NULL - // was specified but buffer - // is not a null pointer -</pre> -</blockquote> -</li> -</ul> - -</div> -</body> -</html> diff --git a/docs/LanguageExtensions.rst b/docs/LanguageExtensions.rst new file mode 100644 index 0000000..c870d20 --- /dev/null +++ b/docs/LanguageExtensions.rst @@ -0,0 +1,2000 @@ +========================= +Clang Language Extensions +========================= + +.. contents:: + :local: + :depth: 1 + +.. toctree:: + :hidden: + + ObjectiveCLiterals + BlockLanguageSpec + Block-ABI-Apple + AutomaticReferenceCounting + +Introduction +============ + +This document describes the language extensions provided by Clang. In addition +to the language extensions listed here, Clang aims to support a broad range of +GCC extensions. Please see the `GCC manual +<http://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html>`_ for more information on +these extensions. + +.. _langext-feature_check: + +Feature Checking Macros +======================= + +Language extensions can be very useful, but only if you know you can depend on +them. In order to allow fine-grain features checks, we support three builtin +function-like macros. This allows you to directly test for a feature in your +code without having to resort to something like autoconf or fragile "compiler +version checks". + +``__has_builtin`` +----------------- + +This function-like macro takes a single identifier argument that is the name of +a builtin function. It evaluates to 1 if the builtin is supported or 0 if not. +It can be used like this: + +.. code-block:: c++ + + #ifndef __has_builtin // Optional of course. + #define __has_builtin(x) 0 // Compatibility with non-clang compilers. + #endif + + ... + #if __has_builtin(__builtin_trap) + __builtin_trap(); + #else + abort(); + #endif + ... + +.. _langext-__has_feature-__has_extension: + +``__has_feature`` and ``__has_extension`` +----------------------------------------- + +These function-like macros take a single identifier argument that is the name +of a feature. ``__has_feature`` evaluates to 1 if the feature is both +supported by Clang and standardized in the current language standard or 0 if +not (but see :ref:`below <langext-has-feature-back-compat>`), while +``__has_extension`` evaluates to 1 if the feature is supported by Clang in the +current language (either as a language extension or a standard language +feature) or 0 if not. They can be used like this: + +.. code-block:: c++ + + #ifndef __has_feature // Optional of course. + #define __has_feature(x) 0 // Compatibility with non-clang compilers. + #endif + #ifndef __has_extension + #define __has_extension __has_feature // Compatibility with pre-3.0 compilers. + #endif + + ... + #if __has_feature(cxx_rvalue_references) + // This code will only be compiled with the -std=c++11 and -std=gnu++11 + // options, because rvalue references are only standardized in C++11. + #endif + + #if __has_extension(cxx_rvalue_references) + // This code will be compiled with the -std=c++11, -std=gnu++11, -std=c++98 + // and -std=gnu++98 options, because rvalue references are supported as a + // language extension in C++98. + #endif + +.. _langext-has-feature-back-compat: + +For backwards compatibility reasons, ``__has_feature`` can also be used to test +for support for non-standardized features, i.e. features not prefixed ``c_``, +``cxx_`` or ``objc_``. + +Another use of ``__has_feature`` is to check for compiler features not related +to the language standard, such as e.g. :doc:`AddressSanitizer +<AddressSanitizer>`. + +If the ``-pedantic-errors`` option is given, ``__has_extension`` is equivalent +to ``__has_feature``. + +The feature tag is described along with the language feature below. + +The feature name or extension name can also be specified with a preceding and +following ``__`` (double underscore) to avoid interference from a macro with +the same name. For instance, ``__cxx_rvalue_references__`` can be used instead +of ``cxx_rvalue_references``. + +``__has_attribute`` +------------------- + +This function-like macro takes a single identifier argument that is the name of +an attribute. It evaluates to 1 if the attribute is supported or 0 if not. It +can be used like this: + +.. code-block:: c++ + + #ifndef __has_attribute // Optional of course. + #define __has_attribute(x) 0 // Compatibility with non-clang compilers. + #endif + + ... + #if __has_attribute(always_inline) + #define ALWAYS_INLINE __attribute__((always_inline)) + #else + #define ALWAYS_INLINE + #endif + ... + +The attribute name can also be specified with a preceding and following ``__`` +(double underscore) to avoid interference from a macro with the same name. For +instance, ``__always_inline__`` can be used instead of ``always_inline``. + +Include File Checking Macros +============================ + +Not all developments systems have the same include files. The +:ref:`langext-__has_include` and :ref:`langext-__has_include_next` macros allow +you to check for the existence of an include file before doing a possibly +failing ``#include`` directive. Include file checking macros must be used +as expressions in ``#if`` or ``#elif`` preprocessing directives. + +.. _langext-__has_include: + +``__has_include`` +----------------- + +This function-like macro takes a single file name string argument that is the +name of an include file. It evaluates to 1 if the file can be found using the +include paths, or 0 otherwise: + +.. code-block:: c++ + + // Note the two possible file name string formats. + #if __has_include("myinclude.h") && __has_include(<stdint.h>) + # include "myinclude.h" + #endif + + // To avoid problem with non-clang compilers not having this macro. + #if defined(__has_include) && __has_include("myinclude.h") + # include "myinclude.h" + #endif + +To test for this feature, use ``#if defined(__has_include)``. + +.. _langext-__has_include_next: + +``__has_include_next`` +---------------------- + +This function-like macro takes a single file name string argument that is the +name of an include file. It is like ``__has_include`` except that it looks for +the second instance of the given file found in the include paths. It evaluates +to 1 if the second instance of the file can be found using the include paths, +or 0 otherwise: + +.. code-block:: c++ + + // Note the two possible file name string formats. + #if __has_include_next("myinclude.h") && __has_include_next(<stdint.h>) + # include_next "myinclude.h" + #endif + + // To avoid problem with non-clang compilers not having this macro. + #if defined(__has_include_next) && __has_include_next("myinclude.h") + # include_next "myinclude.h" + #endif + +Note that ``__has_include_next``, like the GNU extension ``#include_next`` +directive, is intended for use in headers only, and will issue a warning if +used in the top-level compilation file. A warning will also be issued if an +absolute path is used in the file argument. + +``__has_warning`` +----------------- + +This function-like macro takes a string literal that represents a command line +option for a warning and returns true if that is a valid warning option. + +.. code-block:: c++ + + #if __has_warning("-Wformat") + ... + #endif + +Builtin Macros +============== + +``__BASE_FILE__`` + Defined to a string that contains the name of the main input file passed to + Clang. + +``__COUNTER__`` + Defined to an integer value that starts at zero and is incremented each time + the ``__COUNTER__`` macro is expanded. + +``__INCLUDE_LEVEL__`` + Defined to an integral value that is the include depth of the file currently + being translated. For the main file, this value is zero. + +``__TIMESTAMP__`` + Defined to the date and time of the last modification of the current source + file. + +``__clang__`` + Defined when compiling with Clang + +``__clang_major__`` + Defined to the major marketing version number of Clang (e.g., the 2 in + 2.0.1). Note that marketing version numbers should not be used to check for + language features, as different vendors use different numbering schemes. + Instead, use the :ref:`langext-feature_check`. + +``__clang_minor__`` + Defined to the minor version number of Clang (e.g., the 0 in 2.0.1). Note + that marketing version numbers should not be used to check for language + features, as different vendors use different numbering schemes. Instead, use + the :ref:`langext-feature_check`. + +``__clang_patchlevel__`` + Defined to the marketing patch level of Clang (e.g., the 1 in 2.0.1). + +``__clang_version__`` + Defined to a string that captures the Clang marketing version, including the + Subversion tag or revision number, e.g., "``1.5 (trunk 102332)``". + +.. _langext-vectors: + +Vectors and Extended Vectors +============================ + +Supports the GCC, OpenCL, AltiVec and NEON vector extensions. + +OpenCL vector types are created using ``ext_vector_type`` attribute. It +support for ``V.xyzw`` syntax and other tidbits as seen in OpenCL. An example +is: + +.. code-block:: c++ + + typedef float float4 __attribute__((ext_vector_type(4))); + typedef float float2 __attribute__((ext_vector_type(2))); + + float4 foo(float2 a, float2 b) { + float4 c; + c.xz = a; + c.yw = b; + return c; + } + +Query for this feature with ``__has_extension(attribute_ext_vector_type)``. + +Giving ``-faltivec`` option to clang enables support for AltiVec vector syntax +and functions. For example: + +.. code-block:: c++ + + vector float foo(vector int a) { + vector int b; + b = vec_add(a, a) + a; + return (vector float)b; + } + +NEON vector types are created using ``neon_vector_type`` and +``neon_polyvector_type`` attributes. For example: + +.. code-block:: c++ + + typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; + typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; + + int8x8_t foo(int8x8_t a) { + int8x8_t v; + v = a; + return v; + } + +Vector Literals +--------------- + +Vector literals can be used to create vectors from a set of scalars, or +vectors. Either parentheses or braces form can be used. In the parentheses +form the number of literal values specified must be one, i.e. referring to a +scalar value, or must match the size of the vector type being created. If a +single scalar literal value is specified, the scalar literal value will be +replicated to all the components of the vector type. In the brackets form any +number of literals can be specified. For example: + +.. code-block:: c++ + + typedef int v4si __attribute__((__vector_size__(16))); + typedef float float4 __attribute__((ext_vector_type(4))); + typedef float float2 __attribute__((ext_vector_type(2))); + + v4si vsi = (v4si){1, 2, 3, 4}; + float4 vf = (float4)(1.0f, 2.0f, 3.0f, 4.0f); + vector int vi1 = (vector int)(1); // vi1 will be (1, 1, 1, 1). + vector int vi2 = (vector int){1}; // vi2 will be (1, 0, 0, 0). + vector int vi3 = (vector int)(1, 2); // error + vector int vi4 = (vector int){1, 2}; // vi4 will be (1, 2, 0, 0). + vector int vi5 = (vector int)(1, 2, 3, 4); + float4 vf = (float4)((float2)(1.0f, 2.0f), (float2)(3.0f, 4.0f)); + +Vector Operations +----------------- + +The table below shows the support for each operation by vector extension. A +dash indicates that an operation is not accepted according to a corresponding +specification. + +============================== ====== ======= === ==== + Opeator OpenCL AltiVec GCC NEON +============================== ====== ======= === ==== +[] yes yes yes -- +unary operators +, -- yes yes yes -- +++, -- -- yes yes yes -- ++,--,*,/,% yes yes yes -- +bitwise operators &,|,^,~ yes yes yes -- +>>,<< yes yes yes -- +!, &&, || no -- -- -- +==, !=, >, <, >=, <= yes yes -- -- += yes yes yes yes +:? yes -- -- -- +sizeof yes yes yes yes +============================== ====== ======= === ==== + +See also :ref:`langext-__builtin_shufflevector`. + +Messages on ``deprecated`` and ``unavailable`` Attributes +========================================================= + +An optional string message can be added to the ``deprecated`` and +``unavailable`` attributes. For example: + +.. code-block:: c++ + + void explode(void) __attribute__((deprecated("extremely unsafe, use 'combust' instead!!!"))); + +If the deprecated or unavailable declaration is used, the message will be +incorporated into the appropriate diagnostic: + +.. code-block:: c++ + + harmless.c:4:3: warning: 'explode' is deprecated: extremely unsafe, use 'combust' instead!!! + [-Wdeprecated-declarations] + explode(); + ^ + +Query for this feature with +``__has_extension(attribute_deprecated_with_message)`` and +``__has_extension(attribute_unavailable_with_message)``. + +Attributes on Enumerators +========================= + +Clang allows attributes to be written on individual enumerators. This allows +enumerators to be deprecated, made unavailable, etc. The attribute must appear +after the enumerator name and before any initializer, like so: + +.. code-block:: c++ + + enum OperationMode { + OM_Invalid, + OM_Normal, + OM_Terrified __attribute__((deprecated)), + OM_AbortOnError __attribute__((deprecated)) = 4 + }; + +Attributes on the ``enum`` declaration do not apply to individual enumerators. + +Query for this feature with ``__has_extension(enumerator_attributes)``. + +'User-Specified' System Frameworks +================================== + +Clang provides a mechanism by which frameworks can be built in such a way that +they will always be treated as being "system frameworks", even if they are not +present in a system framework directory. This can be useful to system +framework developers who want to be able to test building other applications +with development builds of their framework, including the manner in which the +compiler changes warning behavior for system headers. + +Framework developers can opt-in to this mechanism by creating a +"``.system_framework``" file at the top-level of their framework. That is, the +framework should have contents like: + +.. code-block:: none + + .../TestFramework.framework + .../TestFramework.framework/.system_framework + .../TestFramework.framework/Headers + .../TestFramework.framework/Headers/TestFramework.h + ... + +Clang will treat the presence of this file as an indicator that the framework +should be treated as a system framework, regardless of how it was found in the +framework search path. For consistency, we recommend that such files never be +included in installed versions of the framework. + +Availability attribute +====================== + +Clang introduces the ``availability`` attribute, which can be placed on +declarations to describe the lifecycle of that declaration relative to +operating system versions. Consider the function declaration for a +hypothetical function ``f``: + +.. code-block:: c++ + + void f(void) __attribute__((availability(macosx,introduced=10.4,deprecated=10.6,obsoleted=10.7))); + +The availability attribute states that ``f`` was introduced in Mac OS X 10.4, +deprecated in Mac OS X 10.6, and obsoleted in Mac OS X 10.7. This information +is used by Clang to determine when it is safe to use ``f``: for example, if +Clang is instructed to compile code for Mac OS X 10.5, a call to ``f()`` +succeeds. If Clang is instructed to compile code for Mac OS X 10.6, the call +succeeds but Clang emits a warning specifying that the function is deprecated. +Finally, if Clang is instructed to compile code for Mac OS X 10.7, the call +fails because ``f()`` is no longer available. + +The availability attribute is a comma-separated list starting with the +platform name and then including clauses specifying important milestones in the +declaration's lifetime (in any order) along with additional information. Those +clauses can be: + +introduced=\ *version* + The first version in which this declaration was introduced. + +deprecated=\ *version* + The first version in which this declaration was deprecated, meaning that + users should migrate away from this API. + +obsoleted=\ *version* + The first version in which this declaration was obsoleted, meaning that it + was removed completely and can no longer be used. + +unavailable + This declaration is never available on this platform. + +message=\ *string-literal* + Additional message text that Clang will provide when emitting a warning or + error about use of a deprecated or obsoleted declaration. Useful to direct + users to replacement APIs. + +Multiple availability attributes can be placed on a declaration, which may +correspond to different platforms. Only the availability attribute with the +platform corresponding to the target platform will be used; any others will be +ignored. If no availability attribute specifies availability for the current +target platform, the availability attributes are ignored. Supported platforms +are: + +``ios`` + Apple's iOS operating system. The minimum deployment target is specified by + the ``-mios-version-min=*version*`` or ``-miphoneos-version-min=*version*`` + command-line arguments. + +``macosx`` + Apple's Mac OS X operating system. The minimum deployment target is + specified by the ``-mmacosx-version-min=*version*`` command-line argument. + +A declaration can be used even when deploying back to a platform version prior +to when the declaration was introduced. When this happens, the declaration is +`weakly linked +<https://developer.apple.com/library/mac/#documentation/MacOSX/Conceptual/BPFrameworks/Concepts/WeakLinking.html>`_, +as if the ``weak_import`` attribute were added to the declaration. A +weakly-linked declaration may or may not be present a run-time, and a program +can determine whether the declaration is present by checking whether the +address of that declaration is non-NULL. + +If there are multiple declarations of the same entity, the availability +attributes must either match on a per-platform basis or later +declarations must not have availability attributes for that +platform. For example: + +.. code-block:: c + + void g(void) __attribute__((availability(macosx,introduced=10.4))); + void g(void) __attribute__((availability(macosx,introduced=10.4))); // okay, matches + void g(void) __attribute__((availability(ios,introduced=4.0))); // okay, adds a new platform + void g(void); // okay, inherits both macosx and ios availability from above. + void g(void) __attribute__((availability(macosx,introduced=10.5))); // error: mismatch + +When one method overrides another, the overriding method can be more widely available than the overridden method, e.g.,: + +.. code-block:: objc + + @interface A + - (id)method __attribute__((availability(macosx,introduced=10.4))); + - (id)method2 __attribute__((availability(macosx,introduced=10.4))); + @end + + @interface B : A + - (id)method __attribute__((availability(macosx,introduced=10.3))); // okay: method moved into base class later + - (id)method __attribute__((availability(macosx,introduced=10.5))); // error: this method was available via the base class in 10.4 + @end + +Checks for Standard Language Features +===================================== + +The ``__has_feature`` macro can be used to query if certain standard language +features are enabled. The ``__has_extension`` macro can be used to query if +language features are available as an extension when compiling for a standard +which does not provide them. The features which can be tested are listed here. + +C++98 +----- + +The features listed below are part of the C++98 standard. These features are +enabled by default when compiling C++ code. + +C++ exceptions +^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_exceptions)`` to determine if C++ exceptions have been +enabled. For example, compiling code with ``-fno-exceptions`` disables C++ +exceptions. + +C++ RTTI +^^^^^^^^ + +Use ``__has_feature(cxx_rtti)`` to determine if C++ RTTI has been enabled. For +example, compiling code with ``-fno-rtti`` disables the use of RTTI. + +C++11 +----- + +The features listed below are part of the C++11 standard. As a result, all +these features are enabled with the ``-std=c++11`` or ``-std=gnu++11`` option +when compiling C++ code. + +C++11 SFINAE includes access control +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_access_control_sfinae)`` or +``__has_extension(cxx_access_control_sfinae)`` to determine whether +access-control errors (e.g., calling a private constructor) are considered to +be template argument deduction errors (aka SFINAE errors), per `C++ DR1170 +<http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1170>`_. + +C++11 alias templates +^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_alias_templates)`` or +``__has_extension(cxx_alias_templates)`` to determine if support for C++11's +alias declarations and alias templates is enabled. + +C++11 alignment specifiers +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_alignas)`` or ``__has_extension(cxx_alignas)`` to +determine if support for alignment specifiers using ``alignas`` is enabled. + +C++11 attributes +^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_attributes)`` or ``__has_extension(cxx_attributes)`` to +determine if support for attribute parsing with C++11's square bracket notation +is enabled. + +C++11 generalized constant expressions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_constexpr)`` to determine if support for generalized +constant expressions (e.g., ``constexpr``) is enabled. + +C++11 ``decltype()`` +^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_decltype)`` or ``__has_extension(cxx_decltype)`` to +determine if support for the ``decltype()`` specifier is enabled. C++11's +``decltype`` does not require type-completeness of a function call expression. +Use ``__has_feature(cxx_decltype_incomplete_return_types)`` or +``__has_extension(cxx_decltype_incomplete_return_types)`` to determine if +support for this feature is enabled. + +C++11 default template arguments in function templates +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_default_function_template_args)`` or +``__has_extension(cxx_default_function_template_args)`` to determine if support +for default template arguments in function templates is enabled. + +C++11 ``default``\ ed functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_defaulted_functions)`` or +``__has_extension(cxx_defaulted_functions)`` to determine if support for +defaulted function definitions (with ``= default``) is enabled. + +C++11 delegating constructors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_delegating_constructors)`` to determine if support for +delegating constructors is enabled. + +C++11 ``deleted`` functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_deleted_functions)`` or +``__has_extension(cxx_deleted_functions)`` to determine if support for deleted +function definitions (with ``= delete``) is enabled. + +C++11 explicit conversion functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_explicit_conversions)`` to determine if support for +``explicit`` conversion functions is enabled. + +C++11 generalized initializers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_generalized_initializers)`` to determine if support for +generalized initializers (using braced lists and ``std::initializer_list``) is +enabled. + +C++11 implicit move constructors/assignment operators +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_implicit_moves)`` to determine if Clang will implicitly +generate move constructors and move assignment operators where needed. + +C++11 inheriting constructors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_inheriting_constructors)`` to determine if support for +inheriting constructors is enabled. Clang does not currently implement this +feature. + +C++11 inline namespaces +^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_inline_namespaces)`` or +``__has_extension(cxx_inline_namespaces)`` to determine if support for inline +namespaces is enabled. + +C++11 lambdas +^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_lambdas)`` or ``__has_extension(cxx_lambdas)`` to +determine if support for lambdas is enabled. + +C++11 local and unnamed types as template arguments +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_local_type_template_args)`` or +``__has_extension(cxx_local_type_template_args)`` to determine if support for +local and unnamed types as template arguments is enabled. + +C++11 noexcept +^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_noexcept)`` or ``__has_extension(cxx_noexcept)`` to +determine if support for noexcept exception specifications is enabled. + +C++11 in-class non-static data member initialization +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_nonstatic_member_init)`` to determine whether in-class +initialization of non-static data members is enabled. + +C++11 ``nullptr`` +^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_nullptr)`` or ``__has_extension(cxx_nullptr)`` to +determine if support for ``nullptr`` is enabled. + +C++11 ``override control`` +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_override_control)`` or +``__has_extension(cxx_override_control)`` to determine if support for the +override control keywords is enabled. + +C++11 reference-qualified functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_reference_qualified_functions)`` or +``__has_extension(cxx_reference_qualified_functions)`` to determine if support +for reference-qualified functions (e.g., member functions with ``&`` or ``&&`` +applied to ``*this``) is enabled. + +C++11 range-based ``for`` loop +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_range_for)`` or ``__has_extension(cxx_range_for)`` to +determine if support for the range-based for loop is enabled. + +C++11 raw string literals +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_raw_string_literals)`` to determine if support for raw +string literals (e.g., ``R"x(foo\bar)x"``) is enabled. + +C++11 rvalue references +^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_rvalue_references)`` or +``__has_extension(cxx_rvalue_references)`` to determine if support for rvalue +references is enabled. + +C++11 ``static_assert()`` +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_static_assert)`` or +``__has_extension(cxx_static_assert)`` to determine if support for compile-time +assertions using ``static_assert`` is enabled. + +C++11 type inference +^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_auto_type)`` or ``__has_extension(cxx_auto_type)`` to +determine C++11 type inference is supported using the ``auto`` specifier. If +this is disabled, ``auto`` will instead be a storage class specifier, as in C +or C++98. + +C++11 strongly typed enumerations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_strong_enums)`` or +``__has_extension(cxx_strong_enums)`` to determine if support for strongly +typed, scoped enumerations is enabled. + +C++11 trailing return type +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_trailing_return)`` or +``__has_extension(cxx_trailing_return)`` to determine if support for the +alternate function declaration syntax with trailing return type is enabled. + +C++11 Unicode string literals +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_unicode_literals)`` to determine if support for Unicode +string literals is enabled. + +C++11 unrestricted unions +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_unrestricted_unions)`` to determine if support for +unrestricted unions is enabled. + +C++11 user-defined literals +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_user_literals)`` to determine if support for +user-defined literals is enabled. + +C++11 variadic templates +^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(cxx_variadic_templates)`` or +``__has_extension(cxx_variadic_templates)`` to determine if support for +variadic templates is enabled. + +C11 +--- + +The features listed below are part of the C11 standard. As a result, all these +features are enabled with the ``-std=c11`` or ``-std=gnu11`` option when +compiling C code. Additionally, because these features are all +backward-compatible, they are available as extensions in all language modes. + +C11 alignment specifiers +^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(c_alignas)`` or ``__has_extension(c_alignas)`` to determine +if support for alignment specifiers using ``_Alignas`` is enabled. + +C11 atomic operations +^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(c_atomic)`` or ``__has_extension(c_atomic)`` to determine +if support for atomic types using ``_Atomic`` is enabled. Clang also provides +:ref:`a set of builtins <langext-__c11_atomic>` which can be used to implement +the ``<stdatomic.h>`` operations on ``_Atomic`` types. + +C11 generic selections +^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(c_generic_selections)`` or +``__has_extension(c_generic_selections)`` to determine if support for generic +selections is enabled. + +As an extension, the C11 generic selection expression is available in all +languages supported by Clang. The syntax is the same as that given in the C11 +standard. + +In C, type compatibility is decided according to the rules given in the +appropriate standard, but in C++, which lacks the type compatibility rules used +in C, types are considered compatible only if they are equivalent. + +C11 ``_Static_assert()`` +^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``__has_feature(c_static_assert)`` or ``__has_extension(c_static_assert)`` +to determine if support for compile-time assertions using ``_Static_assert`` is +enabled. + +Checks for Type Traits +====================== + +Clang supports the `GNU C++ type traits +<http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html>`_ and a subset of the +`Microsoft Visual C++ Type traits +<http://msdn.microsoft.com/en-us/library/ms177194(v=VS.100).aspx>`_. For each +supported type trait ``__X``, ``__has_extension(X)`` indicates the presence of +the type trait. For example: + +.. code-block:: c++ + + #if __has_extension(is_convertible_to) + template<typename From, typename To> + struct is_convertible_to { + static const bool value = __is_convertible_to(From, To); + }; + #else + // Emulate type trait + #endif + +The following type traits are supported by Clang: + +* ``__has_nothrow_assign`` (GNU, Microsoft) +* ``__has_nothrow_copy`` (GNU, Microsoft) +* ``__has_nothrow_constructor`` (GNU, Microsoft) +* ``__has_trivial_assign`` (GNU, Microsoft) +* ``__has_trivial_copy`` (GNU, Microsoft) +* ``__has_trivial_constructor`` (GNU, Microsoft) +* ``__has_trivial_destructor`` (GNU, Microsoft) +* ``__has_virtual_destructor`` (GNU, Microsoft) +* ``__is_abstract`` (GNU, Microsoft) +* ``__is_base_of`` (GNU, Microsoft) +* ``__is_class`` (GNU, Microsoft) +* ``__is_convertible_to`` (Microsoft) +* ``__is_empty`` (GNU, Microsoft) +* ``__is_enum`` (GNU, Microsoft) +* ``__is_interface_class`` (Microsoft) +* ``__is_pod`` (GNU, Microsoft) +* ``__is_polymorphic`` (GNU, Microsoft) +* ``__is_union`` (GNU, Microsoft) +* ``__is_literal(type)``: Determines whether the given type is a literal type +* ``__is_final``: Determines whether the given type is declared with a + ``final`` class-virt-specifier. +* ``__underlying_type(type)``: Retrieves the underlying type for a given + ``enum`` type. This trait is required to implement the C++11 standard + library. +* ``__is_trivially_assignable(totype, fromtype)``: Determines whether a value + of type ``totype`` can be assigned to from a value of type ``fromtype`` such + that no non-trivial functions are called as part of that assignment. This + trait is required to implement the C++11 standard library. +* ``__is_trivially_constructible(type, argtypes...)``: Determines whether a + value of type ``type`` can be direct-initialized with arguments of types + ``argtypes...`` such that no non-trivial functions are called as part of + that initialization. This trait is required to implement the C++11 standard + library. + +Blocks +====== + +The syntax and high level language feature description is in +:doc:`BlockLanguageSpec<BlockLanguageSpec>`. Implementation and ABI details for +the clang implementation are in :doc:`Block-ABI-Apple<Block-ABI-Apple>`. + +Query for this feature with ``__has_extension(blocks)``. + +Objective-C Features +==================== + +Related result types +-------------------- + +According to Cocoa conventions, Objective-C methods with certain names +("``init``", "``alloc``", etc.) always return objects that are an instance of +the receiving class's type. Such methods are said to have a "related result +type", meaning that a message send to one of these methods will have the same +static type as an instance of the receiver class. For example, given the +following classes: + +.. code-block:: objc + + @interface NSObject + + (id)alloc; + - (id)init; + @end + + @interface NSArray : NSObject + @end + +and this common initialization pattern + +.. code-block:: objc + + NSArray *array = [[NSArray alloc] init]; + +the type of the expression ``[NSArray alloc]`` is ``NSArray*`` because +``alloc`` implicitly has a related result type. Similarly, the type of the +expression ``[[NSArray alloc] init]`` is ``NSArray*``, since ``init`` has a +related result type and its receiver is known to have the type ``NSArray *``. +If neither ``alloc`` nor ``init`` had a related result type, the expressions +would have had type ``id``, as declared in the method signature. + +A method with a related result type can be declared by using the type +``instancetype`` as its result type. ``instancetype`` is a contextual keyword +that is only permitted in the result type of an Objective-C method, e.g. + +.. code-block:: objc + + @interface A + + (instancetype)constructAnA; + @end + +The related result type can also be inferred for some methods. To determine +whether a method has an inferred related result type, the first word in the +camel-case selector (e.g., "``init``" in "``initWithObjects``") is considered, +and the method will have a related result type if its return type is compatible +with the type of its class and if: + +* the first word is "``alloc``" or "``new``", and the method is a class method, + or + +* the first word is "``autorelease``", "``init``", "``retain``", or "``self``", + and the method is an instance method. + +If a method with a related result type is overridden by a subclass method, the +subclass method must also return a type that is compatible with the subclass +type. For example: + +.. code-block:: objc + + @interface NSString : NSObject + - (NSUnrelated *)init; // incorrect usage: NSUnrelated is not NSString or a superclass of NSString + @end + +Related result types only affect the type of a message send or property access +via the given method. In all other respects, a method with a related result +type is treated the same way as method that returns ``id``. + +Use ``__has_feature(objc_instancetype)`` to determine whether the +``instancetype`` contextual keyword is available. + +Automatic reference counting +---------------------------- + +Clang provides support for :doc:`automated reference counting +<AutomaticReferenceCounting>` in Objective-C, which eliminates the need +for manual ``retain``/``release``/``autorelease`` message sends. There are two +feature macros associated with automatic reference counting: +``__has_feature(objc_arc)`` indicates the availability of automated reference +counting in general, while ``__has_feature(objc_arc_weak)`` indicates that +automated reference counting also includes support for ``__weak`` pointers to +Objective-C objects. + +.. _objc-fixed-enum: + +Enumerations with a fixed underlying type +----------------------------------------- + +Clang provides support for C++11 enumerations with a fixed underlying type +within Objective-C. For example, one can write an enumeration type as: + +.. code-block:: c++ + + typedef enum : unsigned char { Red, Green, Blue } Color; + +This specifies that the underlying type, which is used to store the enumeration +value, is ``unsigned char``. + +Use ``__has_feature(objc_fixed_enum)`` to determine whether support for fixed +underlying types is available in Objective-C. + +Interoperability with C++11 lambdas +----------------------------------- + +Clang provides interoperability between C++11 lambdas and blocks-based APIs, by +permitting a lambda to be implicitly converted to a block pointer with the +corresponding signature. For example, consider an API such as ``NSArray``'s +array-sorting method: + +.. code-block:: objc + + - (NSArray *)sortedArrayUsingComparator:(NSComparator)cmptr; + +``NSComparator`` is simply a typedef for the block pointer ``NSComparisonResult +(^)(id, id)``, and parameters of this type are generally provided with block +literals as arguments. However, one can also use a C++11 lambda so long as it +provides the same signature (in this case, accepting two parameters of type +``id`` and returning an ``NSComparisonResult``): + +.. code-block:: objc + + NSArray *array = @[@"string 1", @"string 21", @"string 12", @"String 11", + @"String 02"]; + const NSStringCompareOptions comparisonOptions + = NSCaseInsensitiveSearch | NSNumericSearch | + NSWidthInsensitiveSearch | NSForcedOrderingSearch; + NSLocale *currentLocale = [NSLocale currentLocale]; + NSArray *sorted + = [array sortedArrayUsingComparator:[=](id s1, id s2) -> NSComparisonResult { + NSRange string1Range = NSMakeRange(0, [s1 length]); + return [s1 compare:s2 options:comparisonOptions + range:string1Range locale:currentLocale]; + }]; + NSLog(@"sorted: %@", sorted); + +This code relies on an implicit conversion from the type of the lambda +expression (an unnamed, local class type called the *closure type*) to the +corresponding block pointer type. The conversion itself is expressed by a +conversion operator in that closure type that produces a block pointer with the +same signature as the lambda itself, e.g., + +.. code-block:: objc + + operator NSComparisonResult (^)(id, id)() const; + +This conversion function returns a new block that simply forwards the two +parameters to the lambda object (which it captures by copy), then returns the +result. The returned block is first copied (with ``Block_copy``) and then +autoreleased. As an optimization, if a lambda expression is immediately +converted to a block pointer (as in the first example, above), then the block +is not copied and autoreleased: rather, it is given the same lifetime as a +block literal written at that point in the program, which avoids the overhead +of copying a block to the heap in the common case. + +The conversion from a lambda to a block pointer is only available in +Objective-C++, and not in C++ with blocks, due to its use of Objective-C memory +management (autorelease). + +Object Literals and Subscripting +-------------------------------- + +Clang provides support for :doc:`Object Literals and Subscripting +<ObjectiveCLiterals>` in Objective-C, which simplifies common Objective-C +programming patterns, makes programs more concise, and improves the safety of +container creation. There are several feature macros associated with object +literals and subscripting: ``__has_feature(objc_array_literals)`` tests the +availability of array literals; ``__has_feature(objc_dictionary_literals)`` +tests the availability of dictionary literals; +``__has_feature(objc_subscripting)`` tests the availability of object +subscripting. + +Objective-C Autosynthesis of Properties +--------------------------------------- + +Clang provides support for autosynthesis of declared properties. Using this +feature, clang provides default synthesis of those properties not declared +@dynamic and not having user provided backing getter and setter methods. +``__has_feature(objc_default_synthesize_properties)`` checks for availability +of this feature in version of clang being used. + +.. _langext-objc_method_family: + +The ``objc_method_family`` attribute +------------------------------------ + +Many methods in Objective-C have conventional meanings determined by their +selectors. It is sometimes useful to be able to mark a method as having a +particular conventional meaning despite not having the right selector, or as +not having the conventional meaning that its selector would suggest. For these +use cases, we provide an attribute to specifically describe the "method family" +that a method belongs to. + +**Usage**: ``__attribute__((objc_method_family(X)))``, where ``X`` is one of +``none``, ``alloc``, ``copy``, ``init``, ``mutableCopy``, or ``new``. This +attribute can only be placed at the end of a method declaration: + +.. code-block:: objc + + - (NSString *)initMyStringValue __attribute__((objc_method_family(none))); + +Users who do not wish to change the conventional meaning of a method, and who +merely want to document its non-standard retain and release semantics, should +use the :ref:`retaining behavior attributes <langext-objc-retain-release>` +described below. + +Query for this feature with ``__has_attribute(objc_method_family)``. + +.. _langext-objc-retain-release: + +Objective-C retaining behavior attributes +----------------------------------------- + +In Objective-C, functions and methods are generally assumed to follow the +`Cocoa Memory Management +<http://developer.apple.com/library/mac/#documentation/Cocoa/Conceptual/MemoryMgmt/Articles/mmRules.html>`_ +conventions for ownership of object arguments and +return values. However, there are exceptions, and so Clang provides attributes +to allow these exceptions to be documented. This are used by ARC and the +`static analyzer <http://clang-analyzer.llvm.org>`_ Some exceptions may be +better described using the :ref:`objc_method_family +<langext-objc_method_family>` attribute instead. + +**Usage**: The ``ns_returns_retained``, ``ns_returns_not_retained``, +``ns_returns_autoreleased``, ``cf_returns_retained``, and +``cf_returns_not_retained`` attributes can be placed on methods and functions +that return Objective-C or CoreFoundation objects. They are commonly placed at +the end of a function prototype or method declaration: + +.. code-block:: objc + + id foo() __attribute__((ns_returns_retained)); + + - (NSString *)bar:(int)x __attribute__((ns_returns_retained)); + +The ``*_returns_retained`` attributes specify that the returned object has a +1 +retain count. The ``*_returns_not_retained`` attributes specify that the return +object has a +0 retain count, even if the normal convention for its selector +would be +1. ``ns_returns_autoreleased`` specifies that the returned object is ++0, but is guaranteed to live at least as long as the next flush of an +autorelease pool. + +**Usage**: The ``ns_consumed`` and ``cf_consumed`` attributes can be placed on +an parameter declaration; they specify that the argument is expected to have a ++1 retain count, which will be balanced in some way by the function or method. +The ``ns_consumes_self`` attribute can only be placed on an Objective-C +method; it specifies that the method expects its ``self`` parameter to have a ++1 retain count, which it will balance in some way. + +.. code-block:: objc + + void foo(__attribute__((ns_consumed)) NSString *string); + + - (void) bar __attribute__((ns_consumes_self)); + - (void) baz:(id) __attribute__((ns_consumed)) x; + +Further examples of these attributes are available in the static analyzer's `list of annotations for analysis +<http://clang-analyzer.llvm.org/annotations.html#cocoa_mem>`_. + +Query for these features with ``__has_attribute(ns_consumed)``, +``__has_attribute(ns_returns_retained)``, etc. + + +Function Overloading in C +========================= + +Clang provides support for C++ function overloading in C. Function overloading +in C is introduced using the ``overloadable`` attribute. For example, one +might provide several overloaded versions of a ``tgsin`` function that invokes +the appropriate standard function computing the sine of a value with ``float``, +``double``, or ``long double`` precision: + +.. code-block:: c + + #include <math.h> + float __attribute__((overloadable)) tgsin(float x) { return sinf(x); } + double __attribute__((overloadable)) tgsin(double x) { return sin(x); } + long double __attribute__((overloadable)) tgsin(long double x) { return sinl(x); } + +Given these declarations, one can call ``tgsin`` with a ``float`` value to +receive a ``float`` result, with a ``double`` to receive a ``double`` result, +etc. Function overloading in C follows the rules of C++ function overloading +to pick the best overload given the call arguments, with a few C-specific +semantics: + +* Conversion from ``float`` or ``double`` to ``long double`` is ranked as a + floating-point promotion (per C99) rather than as a floating-point conversion + (as in C++). + +* A conversion from a pointer of type ``T*`` to a pointer of type ``U*`` is + considered a pointer conversion (with conversion rank) if ``T`` and ``U`` are + compatible types. + +* A conversion from type ``T`` to a value of type ``U`` is permitted if ``T`` + and ``U`` are compatible types. This conversion is given "conversion" rank. + +The declaration of ``overloadable`` functions is restricted to function +declarations and definitions. Most importantly, if any function with a given +name is given the ``overloadable`` attribute, then all function declarations +and definitions with that name (and in that scope) must have the +``overloadable`` attribute. This rule even applies to redeclarations of +functions whose original declaration had the ``overloadable`` attribute, e.g., + +.. code-block:: c + + int f(int) __attribute__((overloadable)); + float f(float); // error: declaration of "f" must have the "overloadable" attribute + + int g(int) __attribute__((overloadable)); + int g(int) { } // error: redeclaration of "g" must also have the "overloadable" attribute + +Functions marked ``overloadable`` must have prototypes. Therefore, the +following code is ill-formed: + +.. code-block:: c + + int h() __attribute__((overloadable)); // error: h does not have a prototype + +However, ``overloadable`` functions are allowed to use a ellipsis even if there +are no named parameters (as is permitted in C++). This feature is particularly +useful when combined with the ``unavailable`` attribute: + +.. code-block:: c++ + + void honeypot(...) __attribute__((overloadable, unavailable)); // calling me is an error + +Functions declared with the ``overloadable`` attribute have their names mangled +according to the same rules as C++ function names. For example, the three +``tgsin`` functions in our motivating example get the mangled names +``_Z5tgsinf``, ``_Z5tgsind``, and ``_Z5tgsine``, respectively. There are two +caveats to this use of name mangling: + +* Future versions of Clang may change the name mangling of functions overloaded + in C, so you should not depend on an specific mangling. To be completely + safe, we strongly urge the use of ``static inline`` with ``overloadable`` + functions. + +* The ``overloadable`` attribute has almost no meaning when used in C++, + because names will already be mangled and functions are already overloadable. + However, when an ``overloadable`` function occurs within an ``extern "C"`` + linkage specification, it's name *will* be mangled in the same way as it + would in C. + +Query for this feature with ``__has_extension(attribute_overloadable)``. + +Initializer lists for complex numbers in C +========================================== + +clang supports an extension which allows the following in C: + +.. code-block:: c++ + + #include <math.h> + #include <complex.h> + complex float x = { 1.0f, INFINITY }; // Init to (1, Inf) + +This construct is useful because there is no way to separately initialize the +real and imaginary parts of a complex variable in standard C, given that clang +does not support ``_Imaginary``. (Clang also supports the ``__real__`` and +``__imag__`` extensions from gcc, which help in some cases, but are not usable +in static initializers.) + +Note that this extension does not allow eliding the braces; the meaning of the +following two lines is different: + +.. code-block:: c++ + + complex float x[] = { { 1.0f, 1.0f } }; // [0] = (1, 1) + complex float x[] = { 1.0f, 1.0f }; // [0] = (1, 0), [1] = (1, 0) + +This extension also works in C++ mode, as far as that goes, but does not apply +to the C++ ``std::complex``. (In C++11, list initialization allows the same +syntax to be used with ``std::complex`` with the same meaning.) + +Builtin Functions +================= + +Clang supports a number of builtin library functions with the same syntax as +GCC, including things like ``__builtin_nan``, ``__builtin_constant_p``, +``__builtin_choose_expr``, ``__builtin_types_compatible_p``, +``__sync_fetch_and_add``, etc. In addition to the GCC builtins, Clang supports +a number of builtins that GCC does not, which are listed here. + +Please note that Clang does not and will not support all of the GCC builtins +for vector operations. Instead of using builtins, you should use the functions +defined in target-specific header files like ``<xmmintrin.h>``, which define +portable wrappers for these. Many of the Clang versions of these functions are +implemented directly in terms of :ref:`extended vector support +<langext-vectors>` instead of builtins, in order to reduce the number of +builtins that we need to implement. + +``__builtin_readcyclecounter`` +------------------------------ + +``__builtin_readcyclecounter`` is used to access the cycle counter register (or +a similar low-latency, high-accuracy clock) on those targets that support it. + +**Syntax**: + +.. code-block:: c++ + + __builtin_readcyclecounter() + +**Example of Use**: + +.. code-block:: c++ + + unsigned long long t0 = __builtin_readcyclecounter(); + do_something(); + unsigned long long t1 = __builtin_readcyclecounter(); + unsigned long long cycles_to_do_something = t1 - t0; // assuming no overflow + +**Description**: + +The ``__builtin_readcyclecounter()`` builtin returns the cycle counter value, +which may be either global or process/thread-specific depending on the target. +As the backing counters often overflow quickly (on the order of seconds) this +should only be used for timing small intervals. When not supported by the +target, the return value is always zero. This builtin takes no arguments and +produces an unsigned long long result. + +Query for this feature with ``__has_builtin(__builtin_readcyclecounter)``. + +.. _langext-__builtin_shufflevector: + +``__builtin_shufflevector`` +--------------------------- + +``__builtin_shufflevector`` is used to express generic vector +permutation/shuffle/swizzle operations. This builtin is also very important +for the implementation of various target-specific header files like +``<xmmintrin.h>``. + +**Syntax**: + +.. code-block:: c++ + + __builtin_shufflevector(vec1, vec2, index1, index2, ...) + +**Examples**: + +.. code-block:: c++ + + // Identity operation - return 4-element vector V1. + __builtin_shufflevector(V1, V1, 0, 1, 2, 3) + + // "Splat" element 0 of V1 into a 4-element result. + __builtin_shufflevector(V1, V1, 0, 0, 0, 0) + + // Reverse 4-element vector V1. + __builtin_shufflevector(V1, V1, 3, 2, 1, 0) + + // Concatenate every other element of 4-element vectors V1 and V2. + __builtin_shufflevector(V1, V2, 0, 2, 4, 6) + + // Concatenate every other element of 8-element vectors V1 and V2. + __builtin_shufflevector(V1, V2, 0, 2, 4, 6, 8, 10, 12, 14) + +**Description**: + +The first two arguments to ``__builtin_shufflevector`` are vectors that have +the same element type. The remaining arguments are a list of integers that +specify the elements indices of the first two vectors that should be extracted +and returned in a new vector. These element indices are numbered sequentially +starting with the first vector, continuing into the second vector. Thus, if +``vec1`` is a 4-element vector, index 5 would refer to the second element of +``vec2``. + +The result of ``__builtin_shufflevector`` is a vector with the same element +type as ``vec1``/``vec2`` but that has an element count equal to the number of +indices specified. + +Query for this feature with ``__has_builtin(__builtin_shufflevector)``. + +``__builtin_unreachable`` +------------------------- + +``__builtin_unreachable`` is used to indicate that a specific point in the +program cannot be reached, even if the compiler might otherwise think it can. +This is useful to improve optimization and eliminates certain warnings. For +example, without the ``__builtin_unreachable`` in the example below, the +compiler assumes that the inline asm can fall through and prints a "function +declared '``noreturn``' should not return" warning. + +**Syntax**: + +.. code-block:: c++ + + __builtin_unreachable() + +**Example of use**: + +.. code-block:: c++ + + void myabort(void) __attribute__((noreturn)); + void myabort(void) { + asm("int3"); + __builtin_unreachable(); + } + +**Description**: + +The ``__builtin_unreachable()`` builtin has completely undefined behavior. +Since it has undefined behavior, it is a statement that it is never reached and +the optimizer can take advantage of this to produce better code. This builtin +takes no arguments and produces a void result. + +Query for this feature with ``__has_builtin(__builtin_unreachable)``. + +``__sync_swap`` +--------------- + +``__sync_swap`` is used to atomically swap integers or pointers in memory. + +**Syntax**: + +.. code-block:: c++ + + type __sync_swap(type *ptr, type value, ...) + +**Example of Use**: + +.. code-block:: c++ + + int old_value = __sync_swap(&value, new_value); + +**Description**: + +The ``__sync_swap()`` builtin extends the existing ``__sync_*()`` family of +atomic intrinsics to allow code to atomically swap the current value with the +new value. More importantly, it helps developers write more efficient and +correct code by avoiding expensive loops around +``__sync_bool_compare_and_swap()`` or relying on the platform specific +implementation details of ``__sync_lock_test_and_set()``. The +``__sync_swap()`` builtin is a full barrier. + +Multiprecision Arithmetic Builtins +---------------------------------- + +Clang provides a set of builtins which expose multiprecision arithmetic in a +manner amenable to C. They all have the following form: + +.. code-block:: c + + unsigned x = ..., y = ..., carryin = ..., carryout; + unsigned sum = __builtin_addc(x, y, carryin, &carryout); + +Thus one can form a multiprecision addition chain in the following manner: + +.. code-block:: c + + unsigned *x, *y, *z, carryin=0, carryout; + z[0] = __builtin_addc(x[0], y[0], carryin, &carryout); + carryin = carryout; + z[1] = __builtin_addc(x[1], y[1], carryin, &carryout); + carryin = carryout; + z[2] = __builtin_addc(x[2], y[2], carryin, &carryout); + carryin = carryout; + z[3] = __builtin_addc(x[3], y[3], carryin, &carryout); + +The complete list of builtins are: + +.. code-block:: c + + unsigned short __builtin_addcs (unsigned short x, unsigned short y, unsigned short carryin, unsigned short *carryout); + unsigned __builtin_addc (unsigned x, unsigned y, unsigned carryin, unsigned *carryout); + unsigned long __builtin_addcl (unsigned long x, unsigned long y, unsigned long carryin, unsigned long *carryout); + unsigned long long __builtin_addcll(unsigned long long x, unsigned long long y, unsigned long long carryin, unsigned long long *carryout); + unsigned short __builtin_subcs (unsigned short x, unsigned short y, unsigned short carryin, unsigned short *carryout); + unsigned __builtin_subc (unsigned x, unsigned y, unsigned carryin, unsigned *carryout); + unsigned long __builtin_subcl (unsigned long x, unsigned long y, unsigned long carryin, unsigned long *carryout); + unsigned long long __builtin_subcll(unsigned long long x, unsigned long long y, unsigned long long carryin, unsigned long long *carryout); + +.. _langext-__c11_atomic: + +__c11_atomic builtins +--------------------- + +Clang provides a set of builtins which are intended to be used to implement +C11's ``<stdatomic.h>`` header. These builtins provide the semantics of the +``_explicit`` form of the corresponding C11 operation, and are named with a +``__c11_`` prefix. The supported operations are: + +* ``__c11_atomic_init`` +* ``__c11_atomic_thread_fence`` +* ``__c11_atomic_signal_fence`` +* ``__c11_atomic_is_lock_free`` +* ``__c11_atomic_store`` +* ``__c11_atomic_load`` +* ``__c11_atomic_exchange`` +* ``__c11_atomic_compare_exchange_strong`` +* ``__c11_atomic_compare_exchange_weak`` +* ``__c11_atomic_fetch_add`` +* ``__c11_atomic_fetch_sub`` +* ``__c11_atomic_fetch_and`` +* ``__c11_atomic_fetch_or`` +* ``__c11_atomic_fetch_xor`` + +Non-standard C++11 Attributes +============================= + +Clang's non-standard C++11 attributes live in the ``clang`` attribute +namespace. + +The ``clang::fallthrough`` attribute +------------------------------------ + +The ``clang::fallthrough`` attribute is used along with the +``-Wimplicit-fallthrough`` argument to annotate intentional fall-through +between switch labels. It can only be applied to a null statement placed at a +point of execution between any statement and the next switch label. It is +common to mark these places with a specific comment, but this attribute is +meant to replace comments with a more strict annotation, which can be checked +by the compiler. This attribute doesn't change semantics of the code and can +be used wherever an intended fall-through occurs. It is designed to mimic +control-flow statements like ``break;``, so it can be placed in most places +where ``break;`` can, but only if there are no statements on the execution path +between it and the next switch label. + +Here is an example: + +.. code-block:: c++ + + // compile with -Wimplicit-fallthrough + switch (n) { + case 22: + case 33: // no warning: no statements between case labels + f(); + case 44: // warning: unannotated fall-through + g(); + [[clang::fallthrough]]; + case 55: // no warning + if (x) { + h(); + break; + } + else { + i(); + [[clang::fallthrough]]; + } + case 66: // no warning + p(); + [[clang::fallthrough]]; // warning: fallthrough annotation does not + // directly precede case label + q(); + case 77: // warning: unannotated fall-through + r(); + } + +``gnu::`` attributes +-------------------- + +Clang also supports GCC's ``gnu`` attribute namespace. All GCC attributes which +are accepted with the ``__attribute__((foo))`` syntax are also accepted as +``[[gnu::foo]]``. This only extends to attributes which are specified by GCC +(see the list of `GCC function attributes +<http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html>`_, `GCC variable +attributes <http://gcc.gnu.org/onlinedocs/gcc/Variable-Attributes.html>`_, and +`GCC type attributes +<http://gcc.gnu.org/onlinedocs/gcc/Type-Attributes.html>`_. As with the GCC +implementation, these attributes must appertain to the *declarator-id* in a +declaration, which means they must go either at the start of the declaration or +immediately after the name being declared. + +For example, this applies the GNU ``unused`` attribute to ``a`` and ``f``, and +also applies the GNU ``noreturn`` attribute to ``f``. + +.. code-block:: c++ + + [[gnu::unused]] int a, f [[gnu::noreturn]] (); + +Target-Specific Extensions +========================== + +Clang supports some language features conditionally on some targets. + +X86/X86-64 Language Extensions +------------------------------ + +The X86 backend has these language extensions: + +Memory references off the GS segment +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Annotating a pointer with address space #256 causes it to be code generated +relative to the X86 GS segment register, and address space #257 causes it to be +relative to the X86 FS segment. Note that this is a very very low-level +feature that should only be used if you know what you're doing (for example in +an OS kernel). + +Here is an example: + +.. code-block:: c++ + + #define GS_RELATIVE __attribute__((address_space(256))) + int foo(int GS_RELATIVE *P) { + return *P; + } + +Which compiles to (on X86-32): + +.. code-block:: gas + + _foo: + movl 4(%esp), %eax + movl %gs:(%eax), %eax + ret + +Extensions for Static Analysis +============================== + +Clang supports additional attributes that are useful for documenting program +invariants and rules for static analysis tools, such as the `Clang Static +Analyzer <http://clang-analyzer.llvm.org/>`_. These attributes are documented +in the analyzer's `list of source-level annotations +<http://clang-analyzer.llvm.org/annotations.html>`_. + + +Extensions for Dynamic Analysis +=============================== + +.. _langext-address_sanitizer: + +AddressSanitizer +---------------- + +Use ``__has_feature(address_sanitizer)`` to check if the code is being built +with :doc:`AddressSanitizer`. + +Use ``__attribute__((no_sanitize_address))`` +on a function declaration +to specify that address safety instrumentation (e.g. AddressSanitizer) should +not be applied to that function. + +.. _langext-thread_sanitizer: + +ThreadSanitizer +---------------- + +Use ``__has_feature(thread_sanitizer)`` to check if the code is being built +with :doc:`ThreadSanitizer`. + +Use ``__attribute__((no_sanitize_thread))`` on a function declaration +to specify that checks for data races on plain (non-atomic) memory accesses +should not be inserted by ThreadSanitizer. +The function may still be instrumented by the tool +to avoid false positives in other places. + +.. _langext-memory_sanitizer: + +MemorySanitizer +---------------- +Use ``__has_feature(memory_sanitizer)`` to check if the code is being built +with :doc:`MemorySanitizer`. + +Use ``__attribute__((no_sanitize_memory))`` on a function declaration +to specify that checks for uninitialized memory should not be inserted +(e.g. by MemorySanitizer). The function may still be instrumented by the tool +to avoid false positives in other places. + + +Thread-Safety Annotation Checking +================================= + +Clang supports additional attributes for checking basic locking policies in +multithreaded programs. Clang currently parses the following list of +attributes, although **the implementation for these annotations is currently in +development.** For more details, see the `GCC implementation +<http://gcc.gnu.org/wiki/ThreadSafetyAnnotation>`_. + +``no_thread_safety_analysis`` +----------------------------- + +Use ``__attribute__((no_thread_safety_analysis))`` on a function declaration to +specify that the thread safety analysis should not be run on that function. +This attribute provides an escape hatch (e.g. for situations when it is +difficult to annotate the locking policy). + +``lockable`` +------------ + +Use ``__attribute__((lockable))`` on a class definition to specify that it has +a lockable type (e.g. a Mutex class). This annotation is primarily used to +check consistency. + +``scoped_lockable`` +------------------- + +Use ``__attribute__((scoped_lockable))`` on a class definition to specify that +it has a "scoped" lockable type. Objects of this type will acquire the lock +upon construction and release it upon going out of scope. This annotation is +primarily used to check consistency. + +``guarded_var`` +--------------- + +Use ``__attribute__((guarded_var))`` on a variable declaration to specify that +the variable must be accessed while holding some lock. + +``pt_guarded_var`` +------------------ + +Use ``__attribute__((pt_guarded_var))`` on a pointer declaration to specify +that the pointer must be dereferenced while holding some lock. + +``guarded_by(l)`` +----------------- + +Use ``__attribute__((guarded_by(l)))`` on a variable declaration to specify +that the variable must be accessed while holding lock ``l``. + +``pt_guarded_by(l)`` +-------------------- + +Use ``__attribute__((pt_guarded_by(l)))`` on a pointer declaration to specify +that the pointer must be dereferenced while holding lock ``l``. + +``acquired_before(...)`` +------------------------ + +Use ``__attribute__((acquired_before(...)))`` on a declaration of a lockable +variable to specify that the lock must be acquired before all attribute +arguments. Arguments must be lockable type, and there must be at least one +argument. + +``acquired_after(...)`` +----------------------- + +Use ``__attribute__((acquired_after(...)))`` on a declaration of a lockable +variable to specify that the lock must be acquired after all attribute +arguments. Arguments must be lockable type, and there must be at least one +argument. + +``exclusive_lock_function(...)`` +-------------------------------- + +Use ``__attribute__((exclusive_lock_function(...)))`` on a function declaration +to specify that the function acquires all listed locks exclusively. This +attribute takes zero or more arguments: either of lockable type or integers +indexing into function parameters of lockable type. If no arguments are given, +the acquired lock is implicitly ``this`` of the enclosing object. + +``shared_lock_function(...)`` +----------------------------- + +Use ``__attribute__((shared_lock_function(...)))`` on a function declaration to +specify that the function acquires all listed locks, although the locks may be +shared (e.g. read locks). This attribute takes zero or more arguments: either +of lockable type or integers indexing into function parameters of lockable +type. If no arguments are given, the acquired lock is implicitly ``this`` of +the enclosing object. + +``exclusive_trylock_function(...)`` +----------------------------------- + +Use ``__attribute__((exclusive_lock_function(...)))`` on a function declaration +to specify that the function will try (without blocking) to acquire all listed +locks exclusively. This attribute takes one or more arguments. The first +argument is an integer or boolean value specifying the return value of a +successful lock acquisition. The remaining arugments are either of lockable +type or integers indexing into function parameters of lockable type. If only +one argument is given, the acquired lock is implicitly ``this`` of the +enclosing object. + +``shared_trylock_function(...)`` +-------------------------------- + +Use ``__attribute__((shared_lock_function(...)))`` on a function declaration to +specify that the function will try (without blocking) to acquire all listed +locks, although the locks may be shared (e.g. read locks). This attribute +takes one or more arguments. The first argument is an integer or boolean value +specifying the return value of a successful lock acquisition. The remaining +arugments are either of lockable type or integers indexing into function +parameters of lockable type. If only one argument is given, the acquired lock +is implicitly ``this`` of the enclosing object. + +``unlock_function(...)`` +------------------------ + +Use ``__attribute__((unlock_function(...)))`` on a function declaration to +specify that the function release all listed locks. This attribute takes zero +or more arguments: either of lockable type or integers indexing into function +parameters of lockable type. If no arguments are given, the acquired lock is +implicitly ``this`` of the enclosing object. + +``lock_returned(l)`` +-------------------- + +Use ``__attribute__((lock_returned(l)))`` on a function declaration to specify +that the function returns lock ``l`` (``l`` must be of lockable type). This +annotation is used to aid in resolving lock expressions. + +``locks_excluded(...)`` +----------------------- + +Use ``__attribute__((locks_excluded(...)))`` on a function declaration to +specify that the function must not be called with the listed locks. Arguments +must be lockable type, and there must be at least one argument. + +``exclusive_locks_required(...)`` +--------------------------------- + +Use ``__attribute__((exclusive_locks_required(...)))`` on a function +declaration to specify that the function must be called while holding the +listed exclusive locks. Arguments must be lockable type, and there must be at +least one argument. + +``shared_locks_required(...)`` +------------------------------ + +Use ``__attribute__((shared_locks_required(...)))`` on a function declaration +to specify that the function must be called while holding the listed shared +locks. Arguments must be lockable type, and there must be at least one +argument. + +Type Safety Checking +==================== + +Clang supports additional attributes to enable checking type safety properties +that can't be enforced by C type system. Usecases include: + +* MPI library implementations, where these attributes enable checking that + buffer type matches the passed ``MPI_Datatype``; +* for HDF5 library there is a similar usecase as MPI; +* checking types of variadic functions' arguments for functions like + ``fcntl()`` and ``ioctl()``. + +You can detect support for these attributes with ``__has_attribute()``. For +example: + +.. code-block:: c++ + + #if defined(__has_attribute) + # if __has_attribute(argument_with_type_tag) && \ + __has_attribute(pointer_with_type_tag) && \ + __has_attribute(type_tag_for_datatype) + # define ATTR_MPI_PWT(buffer_idx, type_idx) __attribute__((pointer_with_type_tag(mpi,buffer_idx,type_idx))) + /* ... other macros ... */ + # endif + #endif + + #if !defined(ATTR_MPI_PWT) + # define ATTR_MPI_PWT(buffer_idx, type_idx) + #endif + + int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */) + ATTR_MPI_PWT(1,3); + +``argument_with_type_tag(...)`` +------------------------------- + +Use ``__attribute__((argument_with_type_tag(arg_kind, arg_idx, +type_tag_idx)))`` on a function declaration to specify that the function +accepts a type tag that determines the type of some other argument. +``arg_kind`` is an identifier that should be used when annotating all +applicable type tags. + +This attribute is primarily useful for checking arguments of variadic functions +(``pointer_with_type_tag`` can be used in most of non-variadic cases). + +For example: + +.. code-block:: c++ + + int fcntl(int fd, int cmd, ...) + __attribute__(( argument_with_type_tag(fcntl,3,2) )); + +``pointer_with_type_tag(...)`` +------------------------------ + +Use ``__attribute__((pointer_with_type_tag(ptr_kind, ptr_idx, type_tag_idx)))`` +on a function declaration to specify that the function accepts a type tag that +determines the pointee type of some other pointer argument. + +For example: + +.. code-block:: c++ + + int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */) + __attribute__(( pointer_with_type_tag(mpi,1,3) )); + +``type_tag_for_datatype(...)`` +------------------------------ + +Clang supports annotating type tags of two forms. + +* **Type tag that is an expression containing a reference to some declared + identifier.** Use ``__attribute__((type_tag_for_datatype(kind, type)))`` on a + declaration with that identifier: + + .. code-block:: c++ + + extern struct mpi_datatype mpi_datatype_int + __attribute__(( type_tag_for_datatype(mpi,int) )); + #define MPI_INT ((MPI_Datatype) &mpi_datatype_int) + +* **Type tag that is an integral literal.** Introduce a ``static const`` + variable with a corresponding initializer value and attach + ``__attribute__((type_tag_for_datatype(kind, type)))`` on that declaration, + for example: + + .. code-block:: c++ + + #define MPI_INT ((MPI_Datatype) 42) + static const MPI_Datatype mpi_datatype_int + __attribute__(( type_tag_for_datatype(mpi,int) )) = 42 + +The attribute also accepts an optional third argument that determines how the +expression is compared to the type tag. There are two supported flags: + +* ``layout_compatible`` will cause types to be compared according to + layout-compatibility rules (C++11 [class.mem] p 17, 18). This is + implemented to support annotating types like ``MPI_DOUBLE_INT``. + + For example: + + .. code-block:: c++ + + /* In mpi.h */ + struct internal_mpi_double_int { double d; int i; }; + extern struct mpi_datatype mpi_datatype_double_int + __attribute__(( type_tag_for_datatype(mpi, struct internal_mpi_double_int, layout_compatible) )); + + #define MPI_DOUBLE_INT ((MPI_Datatype) &mpi_datatype_double_int) + + /* In user code */ + struct my_pair { double a; int b; }; + struct my_pair *buffer; + MPI_Send(buffer, 1, MPI_DOUBLE_INT /*, ... */); // no warning + + struct my_int_pair { int a; int b; } + struct my_int_pair *buffer2; + MPI_Send(buffer2, 1, MPI_DOUBLE_INT /*, ... */); // warning: actual buffer element + // type 'struct my_int_pair' + // doesn't match specified MPI_Datatype + +* ``must_be_null`` specifies that the expression should be a null pointer + constant, for example: + + .. code-block:: c++ + + /* In mpi.h */ + extern struct mpi_datatype mpi_datatype_null + __attribute__(( type_tag_for_datatype(mpi, void, must_be_null) )); + + #define MPI_DATATYPE_NULL ((MPI_Datatype) &mpi_datatype_null) + + /* In user code */ + MPI_Send(buffer, 1, MPI_DATATYPE_NULL /*, ... */); // warning: MPI_DATATYPE_NULL + // was specified but buffer + // is not a null pointer + +Format String Checking +====================== + +Clang supports the ``format`` attribute, which indicates that the function +accepts a ``printf`` or ``scanf``-like format string and corresponding +arguments or a ``va_list`` that contains these arguments. + +Please see `GCC documentation about format attribute +<http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html>`_ to find details +about attribute syntax. + +Clang implements two kinds of checks with this attribute. + +#. Clang checks that the function with the ``format`` attribute is called with + a format string that uses format specifiers that are allowed, and that + arguments match the format string. This is the ``-Wformat`` warning, it is + on by default. + +#. Clang checks that the format string argument is a literal string. This is + the ``-Wformat-nonliteral`` warning, it is off by default. + + Clang implements this mostly the same way as GCC, but there is a difference + for functions that accept a ``va_list`` argument (for example, ``vprintf``). + GCC does not emit ``-Wformat-nonliteral`` warning for calls to such + fuctions. Clang does not warn if the format string comes from a function + parameter, where the function is annotated with a compatible attribute, + otherwise it warns. For example: + + .. code-block:: c + + __attribute__((__format__ (__scanf__, 1, 3))) + void foo(const char* s, char *buf, ...) { + va_list ap; + va_start(ap, buf); + + vprintf(s, ap); // warning: format string is not a string literal + } + + In this case we warn because ``s`` contains a format string for a + ``scanf``-like function, but it is passed to a ``printf``-like function. + + If the attribute is removed, clang still warns, because the format string is + not a string literal. + + Another example: + + .. code-block:: c + + __attribute__((__format__ (__printf__, 1, 3))) + void foo(const char* s, char *buf, ...) { + va_list ap; + va_start(ap, buf); + + vprintf(s, ap); // warning + } + + In this case Clang does not warn because the format string ``s`` and + the corresponding arguments are annotated. If the arguments are + incorrect, the caller of ``foo`` will receive a warning. diff --git a/docs/LibASTMatchers.html b/docs/LibASTMatchers.html deleted file mode 100644 index 8142c19..0000000 --- a/docs/LibASTMatchers.html +++ /dev/null @@ -1,130 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>Matching the Clang AST</title> -<link type="text/css" rel="stylesheet" href="../menu.css" /> -<link type="text/css" rel="stylesheet" href="../content.css" /> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>Matching the Clang AST</h1> -<p>This document explains how to use Clang's LibASTMatchers to match interesting -nodes of the AST and execute code that uses the matched nodes. Combined with -<a href="LibTooling.html">LibTooling</a>, LibASTMatchers helps to write -code-to-code transformation tools or query tools.</p> - -<p>We assume basic knowledge about the Clang AST. See the -<a href="IntroductionToTheClangAST.html">Introduction to the Clang AST</a> if -you want to learn more about how the AST is structured.</p> - -<!-- FIXME: create tutorial and link to the tutorial --> - -<!-- ======================================================================= --> -<h2 id="intro">Introduction</h2> -<!-- ======================================================================= --> - -<p>LibASTMatchers provides a domain specific language to create predicates on Clang's -AST. This DSL is written in and can be used from C++, allowing users to write -a single program to both match AST nodes and access the node's C++ interface -to extract attributes, source locations, or any other information provided on -the AST level.</p> - -<p>AST matchers are predicates on nodes in the AST. Matchers are created -by calling creator functions that allow building up a tree of matchers, where -inner matchers are used to make the match more specific.</p> - -</p>For example, to create a matcher that matches all class or union declarations -in the AST of a translation unit, you can call -<a href="LibASTMatchersReference.html#recordDecl0Anchor">recordDecl()</a>. -To narrow the match down, for example to find all class or union declarations with the name "Foo", -insert a <a href="LibASTMatchersReference.html#hasName0Anchor">hasName</a> -matcher: the call recordDecl(hasName("Foo")) returns a matcher that matches classes -or unions that are named "Foo", in any namespace. By default, matchers that accept -multiple inner matchers use an implicit <a href="LibASTMatchersReference.html#allOf0Anchor">allOf()</a>. -This allows further narrowing down the match, for example to match all classes -that are derived from "Bar": recordDecl(hasName("Foo"), isDerivedFrom("Bar")).</p> - -<!-- ======================================================================= --> -<h2 id="writing">How to create a matcher</h2> -<!-- ======================================================================= --> - -<p>With more than a thousand classes in the Clang AST, one can quickly get lost -when trying to figure out how to create a matcher for a specific pattern. This -section will teach you how to use a rigorous step-by-step pattern to build the -matcher you are interested in. Note that there will always be matchers missing -for some part of the AST. See the section about <a href="#writing">how to write -your own AST matchers</a> later in this document.</p> - -<p>The precondition to using the matchers is to understand how the AST -for what you want to match looks like. The <a href="IntroductionToTheClangAST.html">Introduction to the Clang AST</a> -teaches you how to dump a translation unit's AST into a human readable format.</p> - -<!-- FIXME: Introduce link to ASTMatchersTutorial.html --> -<!-- FIXME: Introduce link to ASTMatchersCookbook.html --> - -<p>In general, the strategy to create the right matchers is:</p> -<ol> -<li>Find the outermost class in Clang's AST you want to match.</li> -<li>Look at the <a href="LibASTMatchersReference.html">AST Matcher Reference</a> for matchers that either match the -node you're interested in or narrow down attributes on the node.</li> -<li>Create your outer match expression. Verify that it works as expected.</li> -<li>Examine the matchers for what the next inner node you want to match is.</li> -<li>Repeat until the matcher is finished.</li> -</ol> - -<!-- ======================================================================= --> -<h2 id="binding">Binding nodes in match expressions</h2> -<!-- ======================================================================= --> - -<p>Matcher expressions allow you to specify which parts of the AST are interesting -for a certain task. Often you will want to then do something with the nodes -that were matched, like building source code transformations.</p> - -<p>To that end, matchers that match specific AST nodes (so called node matchers) -are bindable; for example, recordDecl(hasName("MyClass")).bind("id") will bind -the matched recordDecl node to the string "id", to be later retrieved in the -<a href="http://clang.llvm.org/doxygen/classclang_1_1ast__matchers_1_1MatchFinder_1_1MatchCallback.html">match callback</a>.</p> - -<!-- FIXME: Introduce link to ASTMatchersTutorial.html --> -<!-- FIXME: Introduce link to ASTMatchersCookbook.html --> - -<!-- ======================================================================= --> -<h2 id="writing">Writing your own matchers</h2> -<!-- ======================================================================= --> - -<p>There are multiple different ways to define a matcher, depending on its -type and flexibility.</p> -<ul> -<li><b>VariadicDynCastAllOfMatcher<Base, Derived></b><p>Those match all nodes -of type <i>Base</i> if they can be dynamically casted to <i>Derived</i>. The -names of those matchers are nouns, which closely resemble <i>Derived</i>. -VariadicDynCastAllOfMatchers are the backbone of the matcher hierarchy. Most -often, your match expression will start with one of them, and you can -<a href="#binding">bind</a> the node they represent to ids for later processing.</p> -<p>VariadicDynCastAllOfMatchers are callable classes that model variadic -template functions in C++03. They take an aribtrary number of Matcher<Derived> -and return a Matcher<Base>.</p></li> -<li><b>AST_MATCHER_P(Type, Name, ParamType, Param)</b><p> Most matcher definitions -use the matcher creation macros. Those define both the matcher of type Matcher<Type> -itself, and a matcher-creation function named <i>Name</i> that takes a parameter -of type <i>ParamType</i> and returns the corresponding matcher.</p> -<p>There are multiple matcher definition macros that deal with polymorphic return -values and different parameter counts. See <a href="http://clang.llvm.org/doxygen/ASTMatchersMacros_8h.html">ASTMatchersMacros.h</a>. -</p></li> -<li><b>Matcher creation functions</b><p>Matchers are generated by nesting -calls to matcher creation functions. Most of the time those functions are either -created by using VariadicDynCastAllOfMatcher or the matcher creation macros -(see below). The free-standing functions are an indication that this matcher -is just a combination of other matchers, as is for example the case with -<a href="LibASTMatchersReference.html#callee1Anchor">callee</a>.</p></li> -</ul> - -</div> -</body> -</html> - diff --git a/docs/LibASTMatchers.rst b/docs/LibASTMatchers.rst new file mode 100644 index 0000000..738de79 --- /dev/null +++ b/docs/LibASTMatchers.rst @@ -0,0 +1,134 @@ +====================== +Matching the Clang AST +====================== + +This document explains how to use Clang's LibASTMatchers to match interesting +nodes of the AST and execute code that uses the matched nodes. Combined with +:doc:`LibTooling`, LibASTMatchers helps to write code-to-code transformation +tools or query tools. + +We assume basic knowledge about the Clang AST. See the :doc:`Introduction +to the Clang AST <IntroductionToTheClangAST>` if you want to learn more +about how the AST is structured. + +.. FIXME: create tutorial and link to the tutorial + +Introduction +------------ + +LibASTMatchers provides a domain specific language to create predicates on +Clang's AST. This DSL is written in and can be used from C++, allowing users +to write a single program to both match AST nodes and access the node's C++ +interface to extract attributes, source locations, or any other information +provided on the AST level. + +AST matchers are predicates on nodes in the AST. Matchers are created by +calling creator functions that allow building up a tree of matchers, where +inner matchers are used to make the match more specific. + +For example, to create a matcher that matches all class or union declarations +in the AST of a translation unit, you can call `recordDecl() +<LibASTMatchersReference.html#recordDecl0Anchor>`_. To narrow the match down, +for example to find all class or union declarations with the name "``Foo``", +insert a `hasName <LibASTMatchersReference.html#hasName0Anchor>`_ matcher: the +call ``recordDecl(hasName("Foo"))`` returns a matcher that matches classes or +unions that are named "``Foo``", in any namespace. By default, matchers that +accept multiple inner matchers use an implicit `allOf() +<LibASTMatchersReference.html#allOf0Anchor>`_. This allows further narrowing +down the match, for example to match all classes that are derived from +"``Bar``": ``recordDecl(hasName("Foo"), isDerivedFrom("Bar"))``. + +How to create a matcher +----------------------- + +With more than a thousand classes in the Clang AST, one can quickly get lost +when trying to figure out how to create a matcher for a specific pattern. This +section will teach you how to use a rigorous step-by-step pattern to build the +matcher you are interested in. Note that there will always be matchers missing +for some part of the AST. See the section about :ref:`how to write your own +AST matchers <astmatchers-writing>` later in this document. + +.. FIXME: why is it linking back to the same section?! + +The precondition to using the matchers is to understand how the AST for what you +want to match looks like. The +:doc:`Introduction to the Clang AST <IntroductionToTheClangAST>` teaches you +how to dump a translation unit's AST into a human readable format. + +.. FIXME: Introduce link to ASTMatchersTutorial.html +.. FIXME: Introduce link to ASTMatchersCookbook.html + +In general, the strategy to create the right matchers is: + +#. Find the outermost class in Clang's AST you want to match. +#. Look at the `AST Matcher Reference <LibASTMatchersReference.html>`_ for + matchers that either match the node you're interested in or narrow down + attributes on the node. +#. Create your outer match expression. Verify that it works as expected. +#. Examine the matchers for what the next inner node you want to match is. +#. Repeat until the matcher is finished. + +.. _astmatchers-bind: + +Binding nodes in match expressions +---------------------------------- + +Matcher expressions allow you to specify which parts of the AST are interesting +for a certain task. Often you will want to then do something with the nodes +that were matched, like building source code transformations. + +To that end, matchers that match specific AST nodes (so called node matchers) +are bindable; for example, ``recordDecl(hasName("MyClass")).bind("id")`` will +bind the matched ``recordDecl`` node to the string "``id``", to be later +retrieved in the `match callback +<http://clang.llvm.org/doxygen/classclang_1_1ast__matchers_1_1MatchFinder_1_1MatchCallback.html>`_. + +.. FIXME: Introduce link to ASTMatchersTutorial.html +.. FIXME: Introduce link to ASTMatchersCookbook.html + +Writing your own matchers +------------------------- + +There are multiple different ways to define a matcher, depending on its type +and flexibility. + +``VariadicDynCastAllOfMatcher<Base, Derived>`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Those match all nodes of type *Base* if they can be dynamically casted to +*Derived*. The names of those matchers are nouns, which closely resemble +*Derived*. ``VariadicDynCastAllOfMatchers`` are the backbone of the matcher +hierarchy. Most often, your match expression will start with one of them, and +you can :ref:`bind <astmatchers-bind>` the node they represent to ids for later +processing. + +``VariadicDynCastAllOfMatchers`` are callable classes that model variadic +template functions in C++03. They take an aribtrary number of +``Matcher<Derived>`` and return a ``Matcher<Base>``. + +``AST_MATCHER_P(Type, Name, ParamType, Param)`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Most matcher definitions use the matcher creation macros. Those define both +the matcher of type ``Matcher<Type>`` itself, and a matcher-creation function +named *Name* that takes a parameter of type *ParamType* and returns the +corresponding matcher. + +There are multiple matcher definition macros that deal with polymorphic return +values and different parameter counts. See `ASTMatchersMacros.h +<http://clang.llvm.org/doxygen/ASTMatchersMacros_8h.html>`_. + +.. _astmatchers-writing: + +Matcher creation functions +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Matchers are generated by nesting calls to matcher creation functions. Most of +the time those functions are either created by using +``VariadicDynCastAllOfMatcher`` or the matcher creation macros (see below). +The free-standing functions are an indication that this matcher is just a +combination of other matchers, as is for example the case with `callee +<LibASTMatchersReference.html#callee1Anchor>`_. + +.. FIXME: "... macros (see below)" --- there isn't anything below + diff --git a/docs/LibASTMatchersReference.html b/docs/LibASTMatchersReference.html index ea038e3..b476065 100644 --- a/docs/LibASTMatchersReference.html +++ b/docs/LibASTMatchersReference.html @@ -77,6 +77,19 @@ match callback.</p> <tr style="text-align:left"><th>Return type</th><th>Name</th><th>Parameters</th></tr> <!-- START_DECL_MATCHERS --> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>></td><td class="name" onclick="toggle('accessSpecDecl0')"><a name="accessSpecDecl0Anchor">accessSpecDecl</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1AccessSpecDecl.html">AccessSpecDecl</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="accessSpecDecl0"><pre>Matches C++ access specifier declarations. + +Given + class C { + public: + int a; + }; +accessSpecDecl() + matches 'public:' +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>></td><td class="name" onclick="toggle('classTemplateDecl0')"><a name="classTemplateDecl0Anchor">classTemplateDecl</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ClassTemplateDecl.html">ClassTemplateDecl</a>>...</td></tr> <tr><td colspan="4" class="doc" id="classTemplateDecl0"><pre>Matches C++ class template declarations. @@ -229,180 +242,143 @@ Example matches a </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>></td><td class="name" onclick="toggle('boolLiteral0')"><a name="boolLiteral0Anchor">boolLiteral</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBoolLiteralExpr.html">CXXBoolLiteralExpr</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="boolLiteral0"><pre>Matches bool literals. - -Example matches true - true +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifierLoc.html">NestedNameSpecifierLoc</a>></td><td class="name" onclick="toggle('nestedNameSpecifierLoc0')"><a name="nestedNameSpecifierLoc0Anchor">nestedNameSpecifierLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifierLoc.html">NestedNameSpecifierLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="nestedNameSpecifierLoc0"><pre>Same as nestedNameSpecifier but matches NestedNameSpecifierLoc. </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>></td><td class="name" onclick="toggle('castExpr0')"><a name="castExpr0Anchor">castExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CastExpr.html">CastExpr</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="castExpr0"><pre>Matches any cast nodes of Clang's AST. +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifier.html">NestedNameSpecifier</a>></td><td class="name" onclick="toggle('nestedNameSpecifier0')"><a name="nestedNameSpecifier0Anchor">nestedNameSpecifier</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifier.html">NestedNameSpecifier</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="nestedNameSpecifier0"><pre>Matches nested name specifiers. -Example: castExpr() matches each of the following: - (int) 3; - const_cast<Expr *>(SubExpr); - char c = 0; -but does not match - int i = (0); - int k = 0; +Given + namespace ns { + struct A { static void f(); }; + void A::f() {} + void g() { A::f(); } + } + ns::A a; +nestedNameSpecifier() + matches "ns::" and both "A::" </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>></td><td class="name" onclick="toggle('characterLiteral0')"><a name="characterLiteral0Anchor">characterLiteral</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="characterLiteral0"><pre>Matches character literals (also matches wchar_t). - -Not matching Hex-encoded chars (e.g. 0x1234, which is a IntegerLiteral), -though. - -Example matches 'a', L'a' - char ch = 'a'; wchar_t chw = L'a'; +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>></td><td class="name" onclick="toggle('qualType0')"><a name="qualType0Anchor">qualType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="qualType0"><pre>Matches QualTypes in the clang AST. </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>></td><td class="name" onclick="toggle('constCastExpr0')"><a name="constCastExpr0Anchor">constCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstCastExpr.html">CXXConstCastExpr</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="constCastExpr0"><pre>Matches a const_cast expression. +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('arraySubscriptExpr0')"><a name="arraySubscriptExpr0Anchor">arraySubscriptExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ArraySubscriptExpr.html">ArraySubscriptExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="arraySubscriptExpr0"><pre>Matches array subscript expressions. -Example: Matches const_cast<int*>(&r) in - int n = 42; - const int &r(n); - int* p = const_cast<int*>(&r); +Given + int i = a[1]; +arraySubscriptExpr() + matches "a[1]" </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>></td><td class="name" onclick="toggle('dynamicCastExpr0')"><a name="dynamicCastExpr0Anchor">dynamicCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXDynamicCastExpr.html">CXXDynamicCastExpr</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="dynamicCastExpr0"><pre>Matches a dynamic_cast expression. +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('asmStmt0')"><a name="asmStmt0Anchor">asmStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1AsmStmt.html">AsmStmt</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="asmStmt0"><pre>Matches asm statements. -Example: - dynamicCastExpr() -matches - dynamic_cast<D*>(&b); -in - struct B { virtual ~B() {} }; struct D : B {}; - B b; - D* p = dynamic_cast<D*>(&b); + int i = 100; + __asm("mov al, 2"); +asmStmt() + matches '__asm("mov al, 2")' </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>></td><td class="name" onclick="toggle('explicitCastExpr0')"><a name="explicitCastExpr0Anchor">explicitCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ExplicitCastExpr.html">ExplicitCastExpr</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="explicitCastExpr0"><pre>Matches explicit cast expressions. - -Matches any cast expression written in user code, whether it be a -C-style cast, a functional-style cast, or a keyword cast. - -Does not match implicit conversions. - -Note: the name "explicitCast" is chosen to match Clang's terminology, as -Clang uses the term "cast" to apply to implicit conversions as well as to -actual cast expressions. - -hasDestinationType. +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('binaryOperator0')"><a name="binaryOperator0Anchor">binaryOperator</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BinaryOperator.html">BinaryOperator</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="binaryOperator0"><pre>Matches binary operator expressions. -Example: matches all five of the casts in - int((int)(reinterpret_cast<int>(static_cast<int>(const_cast<int>(42))))) -but does not match the implicit conversion in - long ell = 42; +Example matches a || b + !(a || b) </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>></td><td class="name" onclick="toggle('functionalCastExpr0')"><a name="functionalCastExpr0Anchor">functionalCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXFunctionalCastExpr.html">CXXFunctionalCastExpr</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="functionalCastExpr0"><pre>Matches functional cast expressions +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('bindTemporaryExpr0')"><a name="bindTemporaryExpr0Anchor">bindTemporaryExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBindTemporaryExpr.html">CXXBindTemporaryExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="bindTemporaryExpr0"><pre>Matches nodes where temporaries are created. -Example: Matches Foo(bar); - Foo f = bar; - Foo g = (Foo) bar; - Foo h = Foo(bar); +Example matches FunctionTakesString(GetStringByValue()) + (matcher = bindTemporaryExpr()) + FunctionTakesString(GetStringByValue()); + FunctionTakesStringByPointer(GetStringPointer()); </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>></td><td class="name" onclick="toggle('implicitCastExpr0')"><a name="implicitCastExpr0Anchor">implicitCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ImplicitCastExpr.html">ImplicitCastExpr</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="implicitCastExpr0"><pre>Matches the implicit cast nodes of Clang's AST. +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('boolLiteral0')"><a name="boolLiteral0Anchor">boolLiteral</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBoolLiteralExpr.html">CXXBoolLiteralExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="boolLiteral0"><pre>Matches bool literals. -This matches many different places, including function call return value -eliding, as well as any type conversions. +Example matches true + true </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>></td><td class="name" onclick="toggle('integerLiteral0')"><a name="integerLiteral0Anchor">integerLiteral</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1IntegerLiteral.html">IntegerLiteral</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="integerLiteral0"><pre>Matches integer literals of all sizes encodings. - -Not matching character-encoded integers such as L'a'. +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('breakStmt0')"><a name="breakStmt0Anchor">breakStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BreakStmt.html">BreakStmt</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="breakStmt0"><pre>Matches break statements. -Example matches 1, 1L, 0x1, 1U +Given + while (true) { break; } +breakStmt() + matches 'break' </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>></td><td class="name" onclick="toggle('reinterpretCastExpr0')"><a name="reinterpretCastExpr0Anchor">reinterpretCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXReinterpretCastExpr.html">CXXReinterpretCastExpr</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="reinterpretCastExpr0"><pre>Matches a reinterpret_cast expression. - -Either the source expression or the destination type can be matched -using has(), but hasDestinationType() is more specific and can be -more readable. +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('cStyleCastExpr0')"><a name="cStyleCastExpr0Anchor">cStyleCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CStyleCastExpr.html">CStyleCastExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="cStyleCastExpr0"><pre>Matches a C-style cast expression. -Example matches reinterpret_cast<char*>(&p) in - void* p = reinterpret_cast<char*>(&p); +Example: Matches (int*) 2.2f in + int i = (int) 2.2f; </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>></td><td class="name" onclick="toggle('staticCastExpr0')"><a name="staticCastExpr0Anchor">staticCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXStaticCastExpr.html">CXXStaticCastExpr</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="staticCastExpr0"><pre>Matches a C++ static_cast expression. - -hasDestinationType -reinterpretCast +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('callExpr0')"><a name="callExpr0Anchor">callExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="callExpr0"><pre>Matches call expressions. -Example: - staticCastExpr() -matches - static_cast<long>(8) -in - long eight(static_cast<long>(8)); +Example matches x.y() and y() + X x; + x.y(); + y(); </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>></td><td class="name" onclick="toggle('stringLiteral0')"><a name="stringLiteral0Anchor">stringLiteral</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1StringLiteral.html">StringLiteral</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="stringLiteral0"><pre>Matches string literals (also matches wide string literals). +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('castExpr0')"><a name="castExpr0Anchor">castExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CastExpr.html">CastExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="castExpr0"><pre>Matches any cast nodes of Clang's AST. -Example matches "abcd", L"abcd" - char *s = "abcd"; wchar_t *ws = L"abcd" +Example: castExpr() matches each of the following: + (int) 3; + const_cast<Expr *>(SubExpr); + char c = 0; +but does not match + int i = (0); + int k = 0; </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('arraySubscriptExpr0')"><a name="arraySubscriptExpr0Anchor">arraySubscriptExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ArraySubscriptExpr.html">ArraySubscriptExpr</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="arraySubscriptExpr0"><pre>Matches array subscript expressions. +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('catchStmt0')"><a name="catchStmt0Anchor">catchStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCatchStmt.html">CXXCatchStmt</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="catchStmt0"><pre>Matches catch statements. -Given - int i = a[1]; -arraySubscriptExpr() - matches "a[1]" + try {} catch(int i) {} +catchStmt() + matches 'catch(int i)' </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('binaryOperator0')"><a name="binaryOperator0Anchor">binaryOperator</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BinaryOperator.html">BinaryOperator</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="binaryOperator0"><pre>Matches binary operator expressions. - -Example matches a || b - !(a || b) -</pre></td></tr> - +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('characterLiteral0')"><a name="characterLiteral0Anchor">characterLiteral</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="characterLiteral0"><pre>Matches character literals (also matches wchar_t). -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('bindTemporaryExpr0')"><a name="bindTemporaryExpr0Anchor">bindTemporaryExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBindTemporaryExpr.html">CXXBindTemporaryExpr</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="bindTemporaryExpr0"><pre>Matches nodes where temporaries are created. +Not matching Hex-encoded chars (e.g. 0x1234, which is a IntegerLiteral), +though. -Example matches FunctionTakesString(GetStringByValue()) - (matcher = bindTemporaryExpr()) - FunctionTakesString(GetStringByValue()); - FunctionTakesStringByPointer(GetStringPointer()); +Example matches 'a', L'a' + char ch = 'a'; wchar_t chw = L'a'; </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('callExpr0')"><a name="callExpr0Anchor">callExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>>...</td></tr> -<tr><td colspan="4" class="doc" id="callExpr0"><pre>Matches call expressions. +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('compoundLiteralExpr0')"><a name="compoundLiteralExpr0Anchor">compoundLiteralExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CompoundLiteralExpr.html">CompoundLiteralExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="compoundLiteralExpr0"><pre>Matches compound (i.e. non-scalar) literals -Example matches x.y() and y() - X x; - x.y(); - y(); +Example match: {1}, (1, 2) + int array[4] = {1}; vector int myvec = (vector int)(1, 2); </pre></td></tr> @@ -422,6 +398,16 @@ Example matches a ? b : c </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('constCastExpr0')"><a name="constCastExpr0Anchor">constCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstCastExpr.html">CXXConstCastExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="constCastExpr0"><pre>Matches a const_cast expression. + +Example: Matches const_cast<int*>(&r) in + int n = 42; + const int &r(n); + int* p = const_cast<int*>(&r); +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('constructExpr0')"><a name="constructExpr0Anchor">constructExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>>...</td></tr> <tr><td colspan="4" class="doc" id="constructExpr0"><pre>Matches constructor call expressions (including implicit ones). @@ -434,6 +420,16 @@ Example matches string(ptr, n) and ptr within arguments of f </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('continueStmt0')"><a name="continueStmt0Anchor">continueStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ContinueStmt.html">ContinueStmt</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="continueStmt0"><pre>Matches continue statements. + +Given + while (true) { continue; } +continueStmt() + matches 'continue' +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('declRefExpr0')"><a name="declRefExpr0Anchor">declRefExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>>...</td></tr> <tr><td colspan="4" class="doc" id="declRefExpr0"><pre>Matches expressions that refer to declarations. @@ -484,6 +480,41 @@ doStmt() </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('dynamicCastExpr0')"><a name="dynamicCastExpr0Anchor">dynamicCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXDynamicCastExpr.html">CXXDynamicCastExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="dynamicCastExpr0"><pre>Matches a dynamic_cast expression. + +Example: + dynamicCastExpr() +matches + dynamic_cast<D*>(&b); +in + struct B { virtual ~B() {} }; struct D : B {}; + B b; + D* p = dynamic_cast<D*>(&b); +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('explicitCastExpr0')"><a name="explicitCastExpr0Anchor">explicitCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ExplicitCastExpr.html">ExplicitCastExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="explicitCastExpr0"><pre>Matches explicit cast expressions. + +Matches any cast expression written in user code, whether it be a +C-style cast, a functional-style cast, or a keyword cast. + +Does not match implicit conversions. + +Note: the name "explicitCast" is chosen to match Clang's terminology, as +Clang uses the term "cast" to apply to implicit conversions as well as to +actual cast expressions. + +hasDestinationType. + +Example: matches all five of the casts in + int((int)(reinterpret_cast<int>(static_cast<int>(const_cast<int>(42))))) +but does not match the implicit conversion in + long ell = 42; +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('expr0')"><a name="expr0Anchor">expr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>>...</td></tr> <tr><td colspan="4" class="doc" id="expr0"><pre>Matches expressions. @@ -492,11 +523,42 @@ Example matches x() </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('forRangeStmt0')"><a name="forRangeStmt0Anchor">forRangeStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXForRangeStmt.html">CXXForRangeStmt</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="forRangeStmt0"><pre>Matches range-based for statements. + +forRangeStmt() matches 'for (auto a : i)' + int i[] = {1, 2, 3}; for (auto a : i); + for(int j = 0; j < 5; ++j); +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('forStmt0')"><a name="forStmt0Anchor">forStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ForStmt.html">ForStmt</a>>...</td></tr> <tr><td colspan="4" class="doc" id="forStmt0"><pre>Matches for statements. Example matches 'for (;;) {}' for (;;) {} + int i[] = {1, 2, 3}; for (auto a : i); +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('functionalCastExpr0')"><a name="functionalCastExpr0Anchor">functionalCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXFunctionalCastExpr.html">CXXFunctionalCastExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="functionalCastExpr0"><pre>Matches functional cast expressions + +Example: Matches Foo(bar); + Foo f = bar; + Foo g = (Foo) bar; + Foo h = Foo(bar); +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('gotoStmt0')"><a name="gotoStmt0Anchor">gotoStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1GotoStmt.html">GotoStmt</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="gotoStmt0"><pre>Matches goto statements. + +Given + goto FOO; + FOO: bar(); +gotoStmt() + matches 'goto FOO' </pre></td></tr> @@ -508,6 +570,14 @@ Example matches 'if (x) {}' </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('implicitCastExpr0')"><a name="implicitCastExpr0Anchor">implicitCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ImplicitCastExpr.html">ImplicitCastExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="implicitCastExpr0"><pre>Matches the implicit cast nodes of Clang's AST. + +This matches many different places, including function call return value +eliding, as well as any type conversions. +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('initListExpr0')"><a name="initListExpr0Anchor">initListExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1InitListExpr.html">InitListExpr</a>>...</td></tr> <tr><td colspan="4" class="doc" id="initListExpr0"><pre>Matches init list expressions. @@ -520,6 +590,34 @@ initList() </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('integerLiteral0')"><a name="integerLiteral0Anchor">integerLiteral</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1IntegerLiteral.html">IntegerLiteral</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="integerLiteral0"><pre>Matches integer literals of all sizes encodings. + +Not matching character-encoded integers such as L'a'. + +Example matches 1, 1L, 0x1, 1U +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('labelStmt0')"><a name="labelStmt0Anchor">labelStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1LabelStmt.html">LabelStmt</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="labelStmt0"><pre>Matches label statements. + +Given + goto FOO; + FOO: bar(); +labelStmt() + matches 'FOO:' +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('lambdaExpr0')"><a name="lambdaExpr0Anchor">lambdaExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1LambdaExpr.html">LambdaExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="lambdaExpr0"><pre>Matches lambda expressions. + +Example matches [&](){return 5;} + [&](){return 5;} +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('materializeTemporaryExpr0')"><a name="materializeTemporaryExpr0Anchor">materializeTemporaryExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MaterializeTemporaryExpr.html">MaterializeTemporaryExpr</a>>...</td></tr> <tr><td colspan="4" class="doc" id="materializeTemporaryExpr0"><pre>Matches nodes where temporaries are materialized. @@ -568,6 +666,20 @@ newExpr() </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('nullPtrLiteralExpr0')"><a name="nullPtrLiteralExpr0Anchor">nullPtrLiteralExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNullPtrLiteralExpr.html">CXXNullPtrLiteralExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="nullPtrLiteralExpr0"><pre>Matches nullptr literal. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('nullStmt0')"><a name="nullStmt0Anchor">nullStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NullStmt.html">NullStmt</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="nullStmt0"><pre>Matches null statements. + + foo();; +nullStmt() + matches the second ';' +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('operatorCallExpr0')"><a name="operatorCallExpr0Anchor">operatorCallExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXOperatorCallExpr.html">CXXOperatorCallExpr</a>>...</td></tr> <tr><td colspan="4" class="doc" id="operatorCallExpr0"><pre>Matches overloaded operator calls. @@ -584,6 +696,43 @@ Example matches both operator<<((o << b), c) and operator<<(o, </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('reinterpretCastExpr0')"><a name="reinterpretCastExpr0Anchor">reinterpretCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXReinterpretCastExpr.html">CXXReinterpretCastExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="reinterpretCastExpr0"><pre>Matches a reinterpret_cast expression. + +Either the source expression or the destination type can be matched +using has(), but hasDestinationType() is more specific and can be +more readable. + +Example matches reinterpret_cast<char*>(&p) in + void* p = reinterpret_cast<char*>(&p); +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('returnStmt0')"><a name="returnStmt0Anchor">returnStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReturnStmt.html">ReturnStmt</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="returnStmt0"><pre>Matches return statements. + +Given + return 1; +returnStmt() + matches 'return 1' +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('staticCastExpr0')"><a name="staticCastExpr0Anchor">staticCastExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXStaticCastExpr.html">CXXStaticCastExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="staticCastExpr0"><pre>Matches a C++ static_cast expression. + +hasDestinationType +reinterpretCast + +Example: + staticCastExpr() +matches + static_cast<long>(8) +in + long eight(static_cast<long>(8)); +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('stmt0')"><a name="stmt0Anchor">stmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>>...</td></tr> <tr><td colspan="4" class="doc" id="stmt0"><pre>Matches statements. @@ -594,6 +743,14 @@ stmt() </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('stringLiteral0')"><a name="stringLiteral0Anchor">stringLiteral</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1StringLiteral.html">StringLiteral</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="stringLiteral0"><pre>Matches string literals (also matches wide string literals). + +Example matches "abcd", L"abcd" + char *s = "abcd"; wchar_t *ws = L"abcd" +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('switchCase0')"><a name="switchCase0Anchor">switchCase</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1SwitchCase.html">SwitchCase</a>>...</td></tr> <tr><td colspan="4" class="doc" id="switchCase0"><pre>Matches case and default statements inside switch statements. @@ -604,6 +761,46 @@ switchCase() </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('switchStmt0')"><a name="switchStmt0Anchor">switchStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1SwitchStmt.html">SwitchStmt</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="switchStmt0"><pre>Matches switch statements. + +Given + switch(a) { case 42: break; default: break; } +switchStmt() + matches 'switch(a)'. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('thisExpr0')"><a name="thisExpr0Anchor">thisExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXThisExpr.html">CXXThisExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="thisExpr0"><pre>Matches implicit and explicit this expressions. + +Example matches the implicit this expression in "return i". + (matcher = thisExpr()) +struct foo { + int i; + int f() { return i; } +}; +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('throwExpr0')"><a name="throwExpr0Anchor">throwExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXThrowExpr.html">CXXThrowExpr</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="throwExpr0"><pre>Matches throw expressions. + + try { throw 5; } catch(int i) {} +throwExpr() + matches 'throw 5' +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('tryStmt0')"><a name="tryStmt0Anchor">tryStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXTryStmt.html">CXXTryStmt</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="tryStmt0"><pre>Matches try statements. + + try {} catch(int i) {} +tryStmt() + matches 'try {}' +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('unaryExprOrTypeTraitExpr0')"><a name="unaryExprOrTypeTraitExpr0Anchor">unaryExprOrTypeTraitExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1UnaryExprOrTypeTraitExpr.html">UnaryExprOrTypeTraitExpr</a>>...</td></tr> <tr><td colspan="4" class="doc" id="unaryExprOrTypeTraitExpr0"><pre>Matches sizeof (C99), alignof (C++11) and vec_step (OpenCL) @@ -623,6 +820,13 @@ Example matches !a </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('userDefinedLiteral0')"><a name="userDefinedLiteral0Anchor">userDefinedLiteral</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1UserDefinedLiteral.html">UserDefinedLiteral</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="userDefinedLiteral0"><pre>Matches user defined literal operator call. + +Example match: "foo"_suffix +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('whileStmt0')"><a name="whileStmt0Anchor">whileStmt</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1WhileStmt.html">WhileStmt</a>>...</td></tr> <tr><td colspan="4" class="doc" id="whileStmt0"><pre>Matches while statements. @@ -632,6 +836,564 @@ whileStmt() matches 'while (true) {}'. </pre></td></tr> + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('arrayTypeLoc0')"><a name="arrayTypeLoc0Anchor">arrayTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ArrayTypeLoc.html">ArrayTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="arrayTypeLoc0"><pre>Matches all kinds of arrays. + +Given + int a[] = { 2, 3 }; + int b[4]; + void f() { int c[a[0]]; } +arrayType() + matches "int a[]", "int b[4]" and "int c[a[0]]"; +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('atomicTypeLoc0')"><a name="atomicTypeLoc0Anchor">atomicTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1AtomicTypeLoc.html">AtomicTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="atomicTypeLoc0"><pre>Matches atomic types. + +Given + _Atomic(int) i; +atomicType() + matches "_Atomic(int) i" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('autoTypeLoc0')"><a name="autoTypeLoc0Anchor">autoTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1AutoTypeLoc.html">AutoTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="autoTypeLoc0"><pre>Matches types nodes representing C++11 auto types. + +Given: + auto n = 4; + int v[] = { 2, 3 } + for (auto i : v) { } +autoType() + matches "auto n" and "auto i" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('blockPointerTypeLoc0')"><a name="blockPointerTypeLoc0Anchor">blockPointerTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BlockPointerTypeLoc.html">BlockPointerTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="blockPointerTypeLoc0"><pre>Matches block pointer types, i.e. types syntactically represented as +"void (^)(int)". + +The pointee is always required to be a FunctionType. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('builtinTypeLoc0')"><a name="builtinTypeLoc0Anchor">builtinTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BuiltinTypeLoc.html">BuiltinTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="builtinTypeLoc0"><pre>Matches builtin Types. + +Given + struct A {}; + A a; + int b; + float c; + bool d; +builtinType() + matches "int b", "float c" and "bool d" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('complexTypeLoc0')"><a name="complexTypeLoc0Anchor">complexTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ComplexTypeLoc.html">ComplexTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="complexTypeLoc0"><pre>Matches C99 complex types. + +Given + _Complex float f; +complexType() + matches "_Complex float f" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('constantArrayTypeLoc0')"><a name="constantArrayTypeLoc0Anchor">constantArrayTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ConstantArrayTypeLoc.html">ConstantArrayTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="constantArrayTypeLoc0"><pre>Matches C arrays with a specified constant size. + +Given + void() { + int a[2]; + int b[] = { 2, 3 }; + int c[b[0]]; + } +constantArrayType() + matches "int a[2]" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('dependentSizedArrayTypeLoc0')"><a name="dependentSizedArrayTypeLoc0Anchor">dependentSizedArrayTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1DependentSizedArrayTypeLoc.html">DependentSizedArrayTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="dependentSizedArrayTypeLoc0"><pre>Matches C++ arrays whose size is a value-dependent expression. + +Given + template<typename T, int Size> + class array { + T data[Size]; + }; +dependentSizedArrayType + matches "T data[Size]" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('elaboratedTypeLoc0')"><a name="elaboratedTypeLoc0Anchor">elaboratedTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ElaboratedTypeLoc.html">ElaboratedTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="elaboratedTypeLoc0"><pre>Matches types specified with an elaborated type keyword or with a +qualified name. + +Given + namespace N { + namespace M { + class D {}; + } + } + class C {}; + + class C c; + N::M::D d; + +elaboratedType() matches the type of the variable declarations of both +c and d. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('functionTypeLoc0')"><a name="functionTypeLoc0Anchor">functionTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionTypeLoc.html">FunctionTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="functionTypeLoc0"><pre>Matches FunctionType nodes. + +Given + int (*f)(int); + void g(); +functionType() + matches "int (*f)(int)" and the type of "g". +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('incompleteArrayTypeLoc0')"><a name="incompleteArrayTypeLoc0Anchor">incompleteArrayTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1IncompleteArrayTypeLoc.html">IncompleteArrayTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="incompleteArrayTypeLoc0"><pre>Matches C arrays with unspecified size. + +Given + int a[] = { 2, 3 }; + int b[42]; + void f(int c[]) { int d[a[0]]; }; +incompleteArrayType() + matches "int a[]" and "int c[]" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('lValueReferenceTypeLoc0')"><a name="lValueReferenceTypeLoc0Anchor">lValueReferenceTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1LValueReferenceTypeLoc.html">LValueReferenceTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="lValueReferenceTypeLoc0"><pre>Matches lvalue reference types. + +Given: + int *a; + int &b = *a; + int &&c = 1; + auto &d = b; + auto &&e = c; + auto &&f = 2; + int g = 5; + +lValueReferenceType() matches the types of b, d, and e. e is +matched since the type is deduced as int& by reference collapsing rules. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('memberPointerTypeLoc0')"><a name="memberPointerTypeLoc0Anchor">memberPointerTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberPointerTypeLoc.html">MemberPointerTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="memberPointerTypeLoc0"><pre>Matches member pointer types. +Given + struct A { int i; } + A::* ptr = A::i; +memberPointerType() + matches "A::* ptr" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('parenTypeLoc0')"><a name="parenTypeLoc0Anchor">parenTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ParenTypeLoc.html">ParenTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="parenTypeLoc0"><pre>Matches ParenType nodes. + +Given + int (*ptr_to_array)[4]; + int *array_of_ptrs[4]; + +varDecl(hasType(pointsTo(parenType()))) matches ptr_to_array but not +array_of_ptrs. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('pointerTypeLoc0')"><a name="pointerTypeLoc0Anchor">pointerTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerTypeLoc.html">PointerTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="pointerTypeLoc0"><pre>Matches pointer types. + +Given + int *a; + int &b = *a; + int c = 5; +pointerType() + matches "int *a" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('rValueReferenceTypeLoc0')"><a name="rValueReferenceTypeLoc0Anchor">rValueReferenceTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1RValueReferenceTypeLoc.html">RValueReferenceTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="rValueReferenceTypeLoc0"><pre>Matches rvalue reference types. + +Given: + int *a; + int &b = *a; + int &&c = 1; + auto &d = b; + auto &&e = c; + auto &&f = 2; + int g = 5; + +rValueReferenceType() matches the types of c and f. e is not +matched as it is deduced to int& by reference collapsing rules. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('recordTypeLoc0')"><a name="recordTypeLoc0Anchor">recordTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1RecordTypeLoc.html">RecordTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="recordTypeLoc0"><pre>Matches record types (e.g. structs, classes). + +Given + class C {}; + struct S {}; + + C c; + S s; + +recordType() matches the type of the variable declarations of both c +and s. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('referenceTypeLoc0')"><a name="referenceTypeLoc0Anchor">referenceTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReferenceTypeLoc.html">ReferenceTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="referenceTypeLoc0"><pre>Matches both lvalue and rvalue reference types. + +Given + int *a; + int &b = *a; + int &&c = 1; + auto &d = b; + auto &&e = c; + auto &&f = 2; + int g = 5; + +referenceType() matches the types of b, c, d, e, and f. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('templateSpecializationTypeLoc0')"><a name="templateSpecializationTypeLoc0Anchor">templateSpecializationTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateSpecializationTypeLoc.html">TemplateSpecializationTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="templateSpecializationTypeLoc0"><pre>Matches template specialization types. + +Given + template <typename T> + class C { }; + + template class C<int>; A + C<char> var; B + +templateSpecializationType() matches the type of the explicit +instantiation in A and the type of the variable declaration in B. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('typeLoc0')"><a name="typeLoc0Anchor">typeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="typeLoc0"><pre>Matches TypeLocs in the clang AST. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('typedefTypeLoc0')"><a name="typedefTypeLoc0Anchor">typedefTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefTypeLoc.html">TypedefTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="typedefTypeLoc0"><pre>Matches typedef types. + +Given + typedef int X; +typedefType() + matches "typedef int X" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('variableArrayTypeLoc0')"><a name="variableArrayTypeLoc0Anchor">variableArrayTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1VariableArrayTypeLoc.html">VariableArrayTypeLoc</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="variableArrayTypeLoc0"><pre>Matches C arrays with a specified size that is not an +integer-constant-expression. + +Given + void f() { + int a[] = { 2, 3 } + int b[42]; + int c[a[0]]; +variableArrayType() + matches "int c[a[0]]" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('arrayType0')"><a name="arrayType0Anchor">arrayType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ArrayType.html">ArrayType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="arrayType0"><pre>Matches all kinds of arrays. + +Given + int a[] = { 2, 3 }; + int b[4]; + void f() { int c[a[0]]; } +arrayType() + matches "int a[]", "int b[4]" and "int c[a[0]]"; +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('atomicType0')"><a name="atomicType0Anchor">atomicType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1AtomicType.html">AtomicType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="atomicType0"><pre>Matches atomic types. + +Given + _Atomic(int) i; +atomicType() + matches "_Atomic(int) i" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('autoType0')"><a name="autoType0Anchor">autoType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1AutoType.html">AutoType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="autoType0"><pre>Matches types nodes representing C++11 auto types. + +Given: + auto n = 4; + int v[] = { 2, 3 } + for (auto i : v) { } +autoType() + matches "auto n" and "auto i" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('blockPointerType0')"><a name="blockPointerType0Anchor">blockPointerType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BlockPointerType.html">BlockPointerType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="blockPointerType0"><pre>Matches block pointer types, i.e. types syntactically represented as +"void (^)(int)". + +The pointee is always required to be a FunctionType. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('builtinType0')"><a name="builtinType0Anchor">builtinType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BuiltinType.html">BuiltinType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="builtinType0"><pre>Matches builtin Types. + +Given + struct A {}; + A a; + int b; + float c; + bool d; +builtinType() + matches "int b", "float c" and "bool d" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('complexType0')"><a name="complexType0Anchor">complexType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ComplexType.html">ComplexType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="complexType0"><pre>Matches C99 complex types. + +Given + _Complex float f; +complexType() + matches "_Complex float f" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('constantArrayType0')"><a name="constantArrayType0Anchor">constantArrayType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ConstantArrayType.html">ConstantArrayType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="constantArrayType0"><pre>Matches C arrays with a specified constant size. + +Given + void() { + int a[2]; + int b[] = { 2, 3 }; + int c[b[0]]; + } +constantArrayType() + matches "int a[2]" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('dependentSizedArrayType0')"><a name="dependentSizedArrayType0Anchor">dependentSizedArrayType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1DependentSizedArrayType.html">DependentSizedArrayType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="dependentSizedArrayType0"><pre>Matches C++ arrays whose size is a value-dependent expression. + +Given + template<typename T, int Size> + class array { + T data[Size]; + }; +dependentSizedArrayType + matches "T data[Size]" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('elaboratedType0')"><a name="elaboratedType0Anchor">elaboratedType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ElaboratedType.html">ElaboratedType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="elaboratedType0"><pre>Matches types specified with an elaborated type keyword or with a +qualified name. + +Given + namespace N { + namespace M { + class D {}; + } + } + class C {}; + + class C c; + N::M::D d; + +elaboratedType() matches the type of the variable declarations of both +c and d. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('functionType0')"><a name="functionType0Anchor">functionType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionType.html">FunctionType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="functionType0"><pre>Matches FunctionType nodes. + +Given + int (*f)(int); + void g(); +functionType() + matches "int (*f)(int)" and the type of "g". +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('incompleteArrayType0')"><a name="incompleteArrayType0Anchor">incompleteArrayType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1IncompleteArrayType.html">IncompleteArrayType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="incompleteArrayType0"><pre>Matches C arrays with unspecified size. + +Given + int a[] = { 2, 3 }; + int b[42]; + void f(int c[]) { int d[a[0]]; }; +incompleteArrayType() + matches "int a[]" and "int c[]" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('lValueReferenceType0')"><a name="lValueReferenceType0Anchor">lValueReferenceType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1LValueReferenceType.html">LValueReferenceType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="lValueReferenceType0"><pre>Matches lvalue reference types. + +Given: + int *a; + int &b = *a; + int &&c = 1; + auto &d = b; + auto &&e = c; + auto &&f = 2; + int g = 5; + +lValueReferenceType() matches the types of b, d, and e. e is +matched since the type is deduced as int& by reference collapsing rules. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('memberPointerType0')"><a name="memberPointerType0Anchor">memberPointerType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberPointerType.html">MemberPointerType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="memberPointerType0"><pre>Matches member pointer types. +Given + struct A { int i; } + A::* ptr = A::i; +memberPointerType() + matches "A::* ptr" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('parenType0')"><a name="parenType0Anchor">parenType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ParenType.html">ParenType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="parenType0"><pre>Matches ParenType nodes. + +Given + int (*ptr_to_array)[4]; + int *array_of_ptrs[4]; + +varDecl(hasType(pointsTo(parenType()))) matches ptr_to_array but not +array_of_ptrs. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('pointerType0')"><a name="pointerType0Anchor">pointerType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerType.html">PointerType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="pointerType0"><pre>Matches pointer types. + +Given + int *a; + int &b = *a; + int c = 5; +pointerType() + matches "int *a" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('rValueReferenceType0')"><a name="rValueReferenceType0Anchor">rValueReferenceType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1RValueReferenceType.html">RValueReferenceType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="rValueReferenceType0"><pre>Matches rvalue reference types. + +Given: + int *a; + int &b = *a; + int &&c = 1; + auto &d = b; + auto &&e = c; + auto &&f = 2; + int g = 5; + +rValueReferenceType() matches the types of c and f. e is not +matched as it is deduced to int& by reference collapsing rules. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('recordType0')"><a name="recordType0Anchor">recordType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1RecordType.html">RecordType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="recordType0"><pre>Matches record types (e.g. structs, classes). + +Given + class C {}; + struct S {}; + + C c; + S s; + +recordType() matches the type of the variable declarations of both c +and s. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('referenceType0')"><a name="referenceType0Anchor">referenceType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReferenceType.html">ReferenceType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="referenceType0"><pre>Matches both lvalue and rvalue reference types. + +Given + int *a; + int &b = *a; + int &&c = 1; + auto &d = b; + auto &&e = c; + auto &&f = 2; + int g = 5; + +referenceType() matches the types of b, c, d, e, and f. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('templateSpecializationType0')"><a name="templateSpecializationType0Anchor">templateSpecializationType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateSpecializationType.html">TemplateSpecializationType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="templateSpecializationType0"><pre>Matches template specialization types. + +Given + template <typename T> + class C { }; + + template class C<int>; A + C<char> var; B + +templateSpecializationType() matches the type of the explicit +instantiation in A and the type of the variable declaration in B. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('type0')"><a name="type0Anchor">type</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="type0"><pre>Matches Types in the clang AST. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('typedefType0')"><a name="typedefType0Anchor">typedefType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="typedefType0"><pre>Matches typedef types. + +Given + typedef int X; +typedefType() + matches "typedef int X" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td><td class="name" onclick="toggle('variableArrayType0')"><a name="variableArrayType0Anchor">variableArrayType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1VariableArrayType.html">VariableArrayType</a>>...</td></tr> +<tr><td colspan="4" class="doc" id="variableArrayType0"><pre>Matches C arrays with a specified size that is not an +integer-constant-expression. + +Given + void f() { + int a[] = { 2, 3 } + int b[42]; + int c[a[0]]; +variableArrayType() + matches "int c[a[0]]" +</pre></td></tr> + <!--END_DECL_MATCHERS --> </table> @@ -731,22 +1493,43 @@ constructorDecl(hasAnyConstructorInitializer(isWritten())) </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXOperatorCallExpr.html">CXXOperatorCallExpr</a>></td><td class="name" onclick="toggle('hasOverloadedOperatorName0')"><a name="hasOverloadedOperatorName0Anchor">hasOverloadedOperatorName</a></td><td>std::string Name</td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXMethodDecl.html">CXXMethodDecl</a>></td><td class="name" onclick="toggle('hasOverloadedOperatorName0')"><a name="hasOverloadedOperatorName0Anchor">hasOverloadedOperatorName</a></td><td>StringRef Name</td></tr> <tr><td colspan="4" class="doc" id="hasOverloadedOperatorName0"><pre>Matches overloaded operator names. Matches overloaded operator names specified in strings without the -"operator" prefix, such as "<<", for OverloadedOperatorCall's. +"operator" prefix: e.g. "<<". + +Given: + class A { int operator*(); }; + const A &operator<<(const A &a, const A &b); + A a; + a << a; <-- This matches -Example matches a << b - (matcher == operatorCallExpr(hasOverloadedOperatorName("<<"))) - a << b; - c && d; assuming both operator<< - and operator&& are overloaded somewhere. +operatorCallExpr(hasOverloadedOperatorName("<<"))) matches the specified +line and recordDecl(hasMethod(hasOverloadedOperatorName("*"))) matches +the declaration of A. + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXOperatorCallExpr.html">CXXOperatorCallExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXMethodDecl.html">CXXMethodDecl</a>> </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>></td><td class="name" onclick="toggle('isA1')"><a name="isA1Anchor">isA</a></td><td>StringRef BaseName</td></tr> -<tr><td colspan="4" class="doc" id="isA1"><pre>Overloaded method as shortcut for isA(hasName(...)). +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXOperatorCallExpr.html">CXXOperatorCallExpr</a>></td><td class="name" onclick="toggle('hasOverloadedOperatorName1')"><a name="hasOverloadedOperatorName1Anchor">hasOverloadedOperatorName</a></td><td>StringRef Name</td></tr> +<tr><td colspan="4" class="doc" id="hasOverloadedOperatorName1"><pre>Matches overloaded operator names. + +Matches overloaded operator names specified in strings without the +"operator" prefix: e.g. "<<". + +Given: + class A { int operator*(); }; + const A &operator<<(const A &a, const A &b); + A a; + a << a; <-- This matches + +operatorCallExpr(hasOverloadedOperatorName("<<"))) matches the specified +line and recordDecl(hasMethod(hasOverloadedOperatorName("*"))) matches +the declaration of A. + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXOperatorCallExpr.html">CXXOperatorCallExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXMethodDecl.html">CXXMethodDecl</a>> </pre></td></tr> @@ -755,8 +1538,8 @@ Example matches a << b </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>></td><td class="name" onclick="toggle('isExplicitTemplateSpecialization0')"><a name="isExplicitTemplateSpecialization0Anchor">isExplicitTemplateSpecialization</a></td><td></td></tr> -<tr><td colspan="4" class="doc" id="isExplicitTemplateSpecialization0"><pre>Matches explicit template specializations of function, class, or +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>></td><td class="name" onclick="toggle('isExplicitTemplateSpecialization2')"><a name="isExplicitTemplateSpecialization2Anchor">isExplicitTemplateSpecialization</a></td><td></td></tr> +<tr><td colspan="4" class="doc" id="isExplicitTemplateSpecialization2"><pre>Matches explicit template specializations of function, class, or static member variable template instantiations. Given @@ -769,8 +1552,14 @@ Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Functi </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>></td><td class="name" onclick="toggle('isTemplateInstantiation0')"><a name="isTemplateInstantiation0Anchor">isTemplateInstantiation</a></td><td></td></tr> -<tr><td colspan="4" class="doc" id="isTemplateInstantiation0"><pre>Matches template instantiations of function, class, or static +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>></td><td class="name" onclick="toggle('isSameOrDerivedFrom1')"><a name="isSameOrDerivedFrom1Anchor">isSameOrDerivedFrom</a></td><td>StringRef BaseName</td></tr> +<tr><td colspan="4" class="doc" id="isSameOrDerivedFrom1"><pre>Overloaded method as shortcut for +isSameOrDerivedFrom(hasName(...)). +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>></td><td class="name" onclick="toggle('isTemplateInstantiation2')"><a name="isTemplateInstantiation2Anchor">isTemplateInstantiation</a></td><td></td></tr> +<tr><td colspan="4" class="doc" id="isTemplateInstantiation2"><pre>Matches template instantiations of function, class, or static member variable template instantiations. Given @@ -823,6 +1612,18 @@ compoundStmt(statementCountIs(0))) </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ConstantArrayType.html">ConstantArrayType</a>></td><td class="name" onclick="toggle('hasSize0')"><a name="hasSize0Anchor">hasSize</a></td><td>unsigned N</td></tr> +<tr><td colspan="4" class="doc" id="hasSize0"><pre>Matches ConstantArrayType nodes that have the specified size. + +Given + int a[42]; + int b[2 * 21]; + int c[41], d[43]; +constantArrayType(hasSize(42)) + matches "int a[42]" and "int b[2 * 21]" +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclStmt.html">DeclStmt</a>></td><td class="name" onclick="toggle('declCountIs0')"><a name="declCountIs0Anchor">declCountIs</a></td><td>unsigned N</td></tr> <tr><td colspan="4" class="doc" id="declCountIs0"><pre>Matches declaration statements that contain a specific number of declarations. @@ -836,6 +1637,55 @@ declCountIs(2) </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>></td><td class="name" onclick="toggle('equalsNode0')"><a name="equalsNode0Anchor">equalsNode</a></td><td>Decl* Other</td></tr> +<tr><td colspan="4" class="doc" id="equalsNode0"><pre>Matches if a node equals another node. + +Decl has pointer identity in the AST. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>></td><td class="name" onclick="toggle('isPrivate0')"><a name="isPrivate0Anchor">isPrivate</a></td><td></td></tr> +<tr><td colspan="4" class="doc" id="isPrivate0"><pre>Matches private C++ declarations. + +Given + class C { + public: int a; + protected: int b; + private: int c; + }; +fieldDecl(isPrivate()) + matches 'int c;' +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>></td><td class="name" onclick="toggle('isProtected0')"><a name="isProtected0Anchor">isProtected</a></td><td></td></tr> +<tr><td colspan="4" class="doc" id="isProtected0"><pre>Matches protected C++ declarations. + +Given + class C { + public: int a; + protected: int b; + private: int c; + }; +fieldDecl(isProtected()) + matches 'int b;' +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>></td><td class="name" onclick="toggle('isPublic0')"><a name="isPublic0Anchor">isPublic</a></td><td></td></tr> +<tr><td colspan="4" class="doc" id="isPublic0"><pre>Matches public C++ declarations. + +Given + class C { + public: int a; + protected: int b; + private: int c; + }; +fieldDecl(isPublic()) + matches 'int a;' +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1FloatingLiteral.html">FloatingLiteral</a>></td><td class="name" onclick="toggle('equals1')"><a name="equals1Anchor">equals</a></td><td>ValueT Value</td></tr> <tr><td colspan="4" class="doc" id="equals1"><pre>Matches literals that are equal to the given value. @@ -847,8 +1697,8 @@ Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Charac </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>></td><td class="name" onclick="toggle('isDefinition0')"><a name="isDefinition0Anchor">isDefinition</a></td><td></td></tr> -<tr><td colspan="4" class="doc" id="isDefinition0"><pre>Matches if a declaration has a body attached. +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>></td><td class="name" onclick="toggle('isDefinition2')"><a name="isDefinition2Anchor">isDefinition</a></td><td></td></tr> +<tr><td colspan="4" class="doc" id="isDefinition2"><pre>Matches if a declaration has a body attached. Example matches A, va, fa class A {}; @@ -862,8 +1712,8 @@ Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TagDec </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>></td><td class="name" onclick="toggle('isExplicitTemplateSpecialization2')"><a name="isExplicitTemplateSpecialization2Anchor">isExplicitTemplateSpecialization</a></td><td></td></tr> -<tr><td colspan="4" class="doc" id="isExplicitTemplateSpecialization2"><pre>Matches explicit template specializations of function, class, or +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>></td><td class="name" onclick="toggle('isExplicitTemplateSpecialization0')"><a name="isExplicitTemplateSpecialization0Anchor">isExplicitTemplateSpecialization</a></td><td></td></tr> +<tr><td colspan="4" class="doc" id="isExplicitTemplateSpecialization0"><pre>Matches explicit template specializations of function, class, or static member variable template instantiations. Given @@ -888,8 +1738,8 @@ functionDecl(isExternC()) </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>></td><td class="name" onclick="toggle('isTemplateInstantiation2')"><a name="isTemplateInstantiation2Anchor">isTemplateInstantiation</a></td><td></td></tr> -<tr><td colspan="4" class="doc" id="isTemplateInstantiation2"><pre>Matches template instantiations of function, class, or static +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>></td><td class="name" onclick="toggle('isTemplateInstantiation0')"><a name="isTemplateInstantiation0Anchor">isTemplateInstantiation</a></td><td></td></tr> +<tr><td colspan="4" class="doc" id="isTemplateInstantiation0"><pre>Matches template instantiations of function, class, or static member variable template instantiations. Given @@ -909,6 +1759,17 @@ Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Functi </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>></td><td class="name" onclick="toggle('parameterCountIs0')"><a name="parameterCountIs0Anchor">parameterCountIs</a></td><td>unsigned N</td></tr> +<tr><td colspan="4" class="doc" id="parameterCountIs0"><pre>Matches FunctionDecls that have a specific parameter count. + +Given + void f(int i) {} + void g(int i, int j) {} +functionDecl(parameterCountIs(2)) + matches g(int i, int j) {} +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1IntegerLiteral.html">IntegerLiteral</a>></td><td class="name" onclick="toggle('equals0')"><a name="equals0Anchor">equals</a></td><td>ValueT Value</td></tr> <tr><td colspan="4" class="doc" id="equals0"><pre>Matches literals that are equal to the given value. @@ -953,8 +1814,8 @@ Example matches X (Name is one of "::a::b::X", "a::b::X", "b::X", "X") <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NamedDecl.html">NamedDecl</a>></td><td class="name" onclick="toggle('matchesName0')"><a name="matchesName0Anchor">matchesName</a></td><td>std::string RegExp</td></tr> -<tr><td colspan="4" class="doc" id="matchesName0"><pre>Matches NamedDecl nodes whose full names partially match the -given RegExp. +<tr><td colspan="4" class="doc" id="matchesName0"><pre>Matches NamedDecl nodes whose fully qualified names contain +a substring matched by the given RegExp. Supports specifying enclosing namespaces or classes by prefixing the name with '<enclosing>::'. Does not match typedefs @@ -979,6 +1840,21 @@ callExpr(on(hasType(asString("class Y *")))) </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>></td><td class="name" onclick="toggle('hasLocalQualifiers0')"><a name="hasLocalQualifiers0Anchor">hasLocalQualifiers</a></td><td></td></tr> +<tr><td colspan="4" class="doc" id="hasLocalQualifiers0"><pre>Matches QualType nodes that have local CV-qualifiers attached to +the node, not hidden within a typedef. + +Given + typedef const int const_int; + const_int i; + int *const j; + int *volatile k; + int m; +varDecl(hasType(hasLocalQualifiers())) matches only j and k. +i is const-qualified but the qualifier is not local. +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>></td><td class="name" onclick="toggle('isConstQualified0')"><a name="isConstQualified0Anchor">isConstQualified</a></td><td></td></tr> <tr><td colspan="4" class="doc" id="isConstQualified0"><pre>Matches QualType nodes that are const-qualified, i.e., that include "top-level" const. @@ -1008,8 +1884,16 @@ matches "a(int)", "b(long)", but not "c(double)". </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TagDecl.html">TagDecl</a>></td><td class="name" onclick="toggle('isDefinition2')"><a name="isDefinition2Anchor">isDefinition</a></td><td></td></tr> -<tr><td colspan="4" class="doc" id="isDefinition2"><pre>Matches if a declaration has a body attached. +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('equalsNode1')"><a name="equalsNode1Anchor">equalsNode</a></td><td>Stmt* Other</td></tr> +<tr><td colspan="4" class="doc" id="equalsNode1"><pre>Matches if a node equals another node. + +Stmt has pointer identity in the AST. + +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TagDecl.html">TagDecl</a>></td><td class="name" onclick="toggle('isDefinition0')"><a name="isDefinition0Anchor">isDefinition</a></td><td></td></tr> +<tr><td colspan="4" class="doc" id="isDefinition0"><pre>Matches if a declaration has a body attached. Example matches A, va, fa class A {}; @@ -1110,6 +1994,40 @@ match expressions.</p> <tr style="text-align:left"><th>Return type</th><th>Name</th><th>Parameters</th></tr> <!-- START_TRAVERSAL_MATCHERS --> +<tr><td>Matcher<*></td><td class="name" onclick="toggle('eachOf0')"><a name="eachOf0Anchor">eachOf</a></td><td>Matcher<*> P1, Matcher<*> P2</td></tr> +<tr><td colspan="4" class="doc" id="eachOf0"><pre>Matches if any of the given matchers matches. + +Unlike anyOf, eachOf will generate a match result for each +matching submatcher. + +For example, in: + class A { int a; int b; }; +The matcher: + recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")), + has(fieldDecl(hasName("b")).bind("v")))) +will generate two results binding "v", the first of which binds +the field declaration of a, the second the field declaration of +b. + +Usable as: Any Matcher +</pre></td></tr> + + +<tr><td>Matcher<*></td><td class="name" onclick="toggle('findAll0')"><a name="findAll0Anchor">findAll</a></td><td>Matcher<T> Matcher</td></tr> +<tr><td colspan="4" class="doc" id="findAll0"><pre>Matches if the node or any descendant matches. + +Generates results for each match. + +For example, in: + class A { class B {}; class C {}; }; +The matcher: + recordDecl(hasName("::A"), findAll(recordDecl(isDefinition()).bind("m"))) +will generate results for A, B and C. + +Usable as: Any Matcher +</pre></td></tr> + + <tr><td>Matcher<*></td><td class="name" onclick="toggle('forEach0')"><a name="forEach0Anchor">forEach</a></td><td>Matcher<ChildT> ChildMatcher</td></tr> <tr><td colspan="4" class="doc" id="forEach0"><pre>Matches AST nodes that have child AST nodes that match the provided matcher. @@ -1174,7 +2092,7 @@ matcher. Given void f() { if (true) { int x = 42; } } void g() { for (;;) { int x = 43; } } -expr(integerLiteral(hasAncsestor(ifStmt()))) matches 42, but not 43. +expr(integerLiteral(hasAncestor(ifStmt()))) matches 42, but not 43. Usable as: Any Matcher </pre></td></tr> @@ -1196,6 +2114,18 @@ Usable as: Any Matcher </pre></td></tr> +<tr><td>Matcher<*></td><td class="name" onclick="toggle('hasParent0')"><a name="hasParent0Anchor">hasParent</a></td><td>Matcher<ParentT> ParentMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasParent0"><pre>Matches AST nodes that have a parent that matches the provided +matcher. + +Given +void f() { for (;;) { int x = 42; if (true) { int x = 43; } } } +compoundStmt(hasParent(ifStmt())) matches "{ int x = 43; }". + +Usable as: Any Matcher +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ArraySubscriptExpr.html">ArraySubscriptExpr</a>></td><td class="name" onclick="toggle('hasBase0')"><a name="hasBase0Anchor">hasBase</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>> InnerMatcher</td></tr> <tr><td colspan="4" class="doc" id="hasBase0"><pre>Matches the base expression of an array subscript expression. @@ -1219,6 +2149,78 @@ arraySubscriptExpression(hasIndex(integerLiteral())) </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ArrayTypeLoc.html">ArrayTypeLoc</a>></td><td class="name" onclick="toggle('hasElementTypeLoc1')"><a name="hasElementTypeLoc1Anchor">hasElementTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td></tr> +<tr><td colspan="4" class="doc" id="hasElementTypeLoc1"><pre>Matches arrays and C99 complex types that have a specific element +type. + +Given + struct A {}; + A a[7]; + int b[7]; +arrayType(hasElementType(builtinType())) + matches "int b[7]" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ArrayType.html">ArrayType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ComplexType.html">ComplexType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ArrayType.html">ArrayType</a>></td><td class="name" onclick="toggle('hasElementType1')"><a name="hasElementType1Anchor">hasElementType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td></tr> +<tr><td colspan="4" class="doc" id="hasElementType1"><pre>Matches arrays and C99 complex types that have a specific element +type. + +Given + struct A {}; + A a[7]; + int b[7]; +arrayType(hasElementType(builtinType())) + matches "int b[7]" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ArrayType.html">ArrayType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ComplexType.html">ComplexType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1AtomicTypeLoc.html">AtomicTypeLoc</a>></td><td class="name" onclick="toggle('hasValueTypeLoc0')"><a name="hasValueTypeLoc0Anchor">hasValueTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td></tr> +<tr><td colspan="4" class="doc" id="hasValueTypeLoc0"><pre>Matches atomic types with a specific value type. + +Given + _Atomic(int) i; + _Atomic(float) f; +atomicType(hasValueType(isInteger())) + matches "_Atomic(int) i" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1AtomicType.html">AtomicType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1AtomicType.html">AtomicType</a>></td><td class="name" onclick="toggle('hasValueType0')"><a name="hasValueType0Anchor">hasValueType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td></tr> +<tr><td colspan="4" class="doc" id="hasValueType0"><pre>Matches atomic types with a specific value type. + +Given + _Atomic(int) i; + _Atomic(float) f; +atomicType(hasValueType(isInteger())) + matches "_Atomic(int) i" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1AtomicType.html">AtomicType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1AutoType.html">AutoType</a>></td><td class="name" onclick="toggle('hasDeducedType0')"><a name="hasDeducedType0Anchor">hasDeducedType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td></tr> +<tr><td colspan="4" class="doc" id="hasDeducedType0"><pre>Matches AutoType nodes where the deduced type is a specific type. + +Note: There is no TypeLoc for the deduced type and thus no +getDeducedLoc() matcher. + +Given + auto a = 1; + auto b = 2.0; +autoType(hasDeducedType(isInteger())) + matches "auto a" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1AutoType.html">AutoType</a>> +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BinaryOperator.html">BinaryOperator</a>></td><td class="name" onclick="toggle('hasEitherOperand0')"><a name="hasEitherOperand0Anchor">hasEitherOperand</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>> InnerMatcher</td></tr> <tr><td colspan="4" class="doc" id="hasEitherOperand0"><pre>Matches if either the left hand side or the right hand side of a binary operator matches. @@ -1241,11 +2243,49 @@ Example matches b (matcher = binaryOperator(hasRHS())) </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>></td><td class="name" onclick="toggle('hasDeclaration0')"><a name="hasDeclaration0Anchor">hasDeclaration</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>> InnerMatcher</td></tr> -<tr><td colspan="4" class="doc" id="hasDeclaration0"><pre>Matches a type if the declaration of the type matches the given +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BlockPointerTypeLoc.html">BlockPointerTypeLoc</a>></td><td class="name" onclick="toggle('pointeeLoc3')"><a name="pointeeLoc3Anchor">pointeeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td></tr> +<tr><td colspan="4" class="doc" id="pointeeLoc3"><pre>Narrows PointerType (and similar) matchers to those where the +pointee matches a given matcher. + +Given + int *a; + int const *b; + float const *f; +pointerType(pointee(isConstQualified(), isInteger())) + matches "int const *b" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BlockPointerType.html">BlockPointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberPointerType.html">MemberPointerType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerType.html">PointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReferenceType.html">ReferenceType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BlockPointerType.html">BlockPointerType</a>></td><td class="name" onclick="toggle('pointee3')"><a name="pointee3Anchor">pointee</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td></tr> +<tr><td colspan="4" class="doc" id="pointee3"><pre>Narrows PointerType (and similar) matchers to those where the +pointee matches a given matcher. + +Given + int *a; + int const *b; + float const *f; +pointerType(pointee(isConstQualified(), isInteger())) + matches "int const *b" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BlockPointerType.html">BlockPointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberPointerType.html">MemberPointerType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerType.html">PointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReferenceType.html">ReferenceType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>></td><td class="name" onclick="toggle('hasDeclaration3')"><a name="hasDeclaration3Anchor">hasDeclaration</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasDeclaration3"><pre>Matches a type if the declaration of the type matches the given matcher. -Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>> +In addition to being usable as Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>, also usable as +Matcher<T> for any T supporting the getDecl() member function. e.g. various +subtypes of clang::Type. + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberExpr.html">MemberExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateSpecializationType.html">TemplateSpecializationType</a>> </pre></td></tr> @@ -1331,9 +2371,15 @@ Example matches A() in the last line </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>></td><td class="name" onclick="toggle('isA0')"><a name="isA0Anchor">isA</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NamedDecl.html">NamedDecl</a>> Base</td></tr> -<tr><td colspan="4" class="doc" id="isA0"><pre>Similar to isDerivedFrom(), but also matches classes that directly -match Base. +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>></td><td class="name" onclick="toggle('hasMethod0')"><a name="hasMethod0Anchor">hasMethod</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXMethodDecl.html">CXXMethodDecl</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasMethod0"><pre>Matches the first method of a class or struct that satisfies InnerMatcher. + +Given: + class A { void func(); }; + class B { void member(); }; + +recordDecl(hasMethod(hasName("func"))) matches the declaration of A +but not B. </pre></td></tr> @@ -1358,6 +2404,12 @@ In the following example, Bar matches isDerivedFrom(hasName("X")): </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>></td><td class="name" onclick="toggle('isSameOrDerivedFrom0')"><a name="isSameOrDerivedFrom0Anchor">isSameOrDerivedFrom</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NamedDecl.html">NamedDecl</a>> Base</td></tr> +<tr><td colspan="4" class="doc" id="isSameOrDerivedFrom0"><pre>Similar to isDerivedFrom(), but also matches classes that directly +match Base. +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>></td><td class="name" onclick="toggle('callee1')"><a name="callee1Anchor">callee</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>> InnerMatcher</td></tr> <tr><td colspan="4" class="doc" id="callee1"><pre>Matches if the call expression's callee's declaration matches the given matcher. @@ -1391,11 +2443,17 @@ Example matches y in x(y) </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>></td><td class="name" onclick="toggle('hasDeclaration1')"><a name="hasDeclaration1Anchor">hasDeclaration</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>> InnerMatcher</td></tr> -<tr><td colspan="4" class="doc" id="hasDeclaration1"><pre>Matches a type if the declaration of the type matches the given +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>></td><td class="name" onclick="toggle('hasDeclaration4')"><a name="hasDeclaration4Anchor">hasDeclaration</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasDeclaration4"><pre>Matches a type if the declaration of the type matches the given matcher. -Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>> +In addition to being usable as Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>, also usable as +Matcher<T> for any T supporting the getDecl() member function. e.g. various +subtypes of clang::Type. + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberExpr.html">MemberExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateSpecializationType.html">TemplateSpecializationType</a>> </pre></td></tr> @@ -1437,6 +2495,36 @@ classTemplateSpecializationDecl(hasTemplateArgument( </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ComplexTypeLoc.html">ComplexTypeLoc</a>></td><td class="name" onclick="toggle('hasElementTypeLoc0')"><a name="hasElementTypeLoc0Anchor">hasElementTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td></tr> +<tr><td colspan="4" class="doc" id="hasElementTypeLoc0"><pre>Matches arrays and C99 complex types that have a specific element +type. + +Given + struct A {}; + A a[7]; + int b[7]; +arrayType(hasElementType(builtinType())) + matches "int b[7]" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ArrayType.html">ArrayType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ComplexType.html">ComplexType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ComplexType.html">ComplexType</a>></td><td class="name" onclick="toggle('hasElementType0')"><a name="hasElementType0Anchor">hasElementType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td></tr> +<tr><td colspan="4" class="doc" id="hasElementType0"><pre>Matches arrays and C99 complex types that have a specific element +type. + +Given + struct A {}; + A a[7]; + int b[7]; +arrayType(hasElementType(builtinType())) + matches "int b[7]" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ArrayType.html">ArrayType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ComplexType.html">ComplexType</a>> +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CompoundStmt.html">CompoundStmt</a>></td><td class="name" onclick="toggle('hasAnySubstatement0')"><a name="hasAnySubstatement0Anchor">hasAnySubstatement</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>> InnerMatcher</td></tr> <tr><td colspan="4" class="doc" id="hasAnySubstatement0"><pre>Matches compound statements where at least one substatement matches a given matcher. @@ -1534,6 +2622,22 @@ declStmt(hasSingleDecl(anything())) </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>></td><td class="name" onclick="toggle('hasDeclContext0')"><a name="hasDeclContext0Anchor">hasDeclContext</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasDeclContext0"><pre>Matches declarations whose declaration context, interpreted as a +Decl, matches InnerMatcher. + +Given + namespace N { + namespace M { + class D {}; + } + } + +recordDecl(hasDeclContext(namedDecl(hasName("M")))) matches the +declaration of class D. +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1DoStmt.html">DoStmt</a>></td><td class="name" onclick="toggle('hasBody0')"><a name="hasBody0Anchor">hasBody</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>> InnerMatcher</td></tr> <tr><td colspan="4" class="doc" id="hasBody0"><pre>Matches a 'for', 'while', or 'do while' statement that has a given body. @@ -1556,6 +2660,40 @@ Example matches true (matcher = hasCondition(boolLiteral(equals(true)))) </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ElaboratedType.html">ElaboratedType</a>></td><td class="name" onclick="toggle('hasQualifier0')"><a name="hasQualifier0Anchor">hasQualifier</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifier.html">NestedNameSpecifier</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasQualifier0"><pre>Matches ElaboratedTypes whose qualifier, a NestedNameSpecifier, +matches InnerMatcher if the qualifier exists. + +Given + namespace N { + namespace M { + class D {}; + } + } + N::M::D d; + +elaboratedType(hasQualifier(hasPrefix(specifiesNamespace(hasName("N")))) +matches the type of the variable declaration of d. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ElaboratedType.html">ElaboratedType</a>></td><td class="name" onclick="toggle('namesType0')"><a name="namesType0Anchor">namesType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="namesType0"><pre>Matches ElaboratedTypes whose named type matches InnerMatcher. + +Given + namespace N { + namespace M { + class D {}; + } + } + N::M::D d; + +elaboratedType(namesType(recordType( +hasDeclaration(namedDecl(hasName("D")))))) matches the type of the variable +declaration of d. +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ExplicitCastExpr.html">ExplicitCastExpr</a>></td><td class="name" onclick="toggle('hasDestinationType0')"><a name="hasDestinationType0Anchor">hasDestinationType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>> InnerMatcher</td></tr> <tr><td colspan="4" class="doc" id="hasDestinationType0"><pre>Matches casts whose destination type matches a given matcher. @@ -1753,6 +2891,20 @@ FIXME: Unit test this matcher </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberExpr.html">MemberExpr</a>></td><td class="name" onclick="toggle('hasDeclaration2')"><a name="hasDeclaration2Anchor">hasDeclaration</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasDeclaration2"><pre>Matches a type if the declaration of the type matches the given +matcher. + +In addition to being usable as Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>, also usable as +Matcher<T> for any T supporting the getDecl() member function. e.g. various +subtypes of clang::Type. + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberExpr.html">MemberExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateSpecializationType.html">TemplateSpecializationType</a>> +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberExpr.html">MemberExpr</a>></td><td class="name" onclick="toggle('hasObjectExpression0')"><a name="hasObjectExpression0Anchor">hasObjectExpression</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>> InnerMatcher</td></tr> <tr><td colspan="4" class="doc" id="hasObjectExpression0"><pre>Matches a member expression where the object expression is matched by a given matcher. @@ -1781,11 +2933,173 @@ memberExpr(member(hasName("first"))) </pre></td></tr> -<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>></td><td class="name" onclick="toggle('hasDeclaration2')"><a name="hasDeclaration2Anchor">hasDeclaration</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>> InnerMatcher</td></tr> -<tr><td colspan="4" class="doc" id="hasDeclaration2"><pre>Matches a type if the declaration of the type matches the given +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberPointerTypeLoc.html">MemberPointerTypeLoc</a>></td><td class="name" onclick="toggle('pointeeLoc2')"><a name="pointeeLoc2Anchor">pointeeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td></tr> +<tr><td colspan="4" class="doc" id="pointeeLoc2"><pre>Narrows PointerType (and similar) matchers to those where the +pointee matches a given matcher. + +Given + int *a; + int const *b; + float const *f; +pointerType(pointee(isConstQualified(), isInteger())) + matches "int const *b" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BlockPointerType.html">BlockPointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberPointerType.html">MemberPointerType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerType.html">PointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReferenceType.html">ReferenceType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberPointerType.html">MemberPointerType</a>></td><td class="name" onclick="toggle('pointee2')"><a name="pointee2Anchor">pointee</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td></tr> +<tr><td colspan="4" class="doc" id="pointee2"><pre>Narrows PointerType (and similar) matchers to those where the +pointee matches a given matcher. + +Given + int *a; + int const *b; + float const *f; +pointerType(pointee(isConstQualified(), isInteger())) + matches "int const *b" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BlockPointerType.html">BlockPointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberPointerType.html">MemberPointerType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerType.html">PointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReferenceType.html">ReferenceType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifierLoc.html">NestedNameSpecifierLoc</a>></td><td class="name" onclick="toggle('hasPrefix1')"><a name="hasPrefix1Anchor">hasPrefix</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifierLoc.html">NestedNameSpecifierLoc</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasPrefix1"><pre>Matches on the prefix of a NestedNameSpecifierLoc. + +Given + struct A { struct B { struct C {}; }; }; + A::B::C c; +nestedNameSpecifierLoc(hasPrefix(loc(specifiesType(asString("struct A"))))) + matches "A::" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifierLoc.html">NestedNameSpecifierLoc</a>></td><td class="name" onclick="toggle('loc1')"><a name="loc1Anchor">loc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifier.html">NestedNameSpecifier</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="loc1"><pre>Matches NestedNameSpecifierLocs for which the given inner +NestedNameSpecifier-matcher matches. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifierLoc.html">NestedNameSpecifierLoc</a>></td><td class="name" onclick="toggle('specifiesTypeLoc0')"><a name="specifiesTypeLoc0Anchor">specifiesTypeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="specifiesTypeLoc0"><pre>Matches nested name specifier locs that specify a type matching the +given TypeLoc. + +Given + struct A { struct B { struct C {}; }; }; + A::B::C c; +nestedNameSpecifierLoc(specifiesTypeLoc(loc(type( + hasDeclaration(recordDecl(hasName("A"))))))) + matches "A::" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifier.html">NestedNameSpecifier</a>></td><td class="name" onclick="toggle('hasPrefix0')"><a name="hasPrefix0Anchor">hasPrefix</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifier.html">NestedNameSpecifier</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasPrefix0"><pre>Matches on the prefix of a NestedNameSpecifier. + +Given + struct A { struct B { struct C {}; }; }; + A::B::C c; +nestedNameSpecifier(hasPrefix(specifiesType(asString("struct A")))) and + matches "A::" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifier.html">NestedNameSpecifier</a>></td><td class="name" onclick="toggle('specifiesNamespace0')"><a name="specifiesNamespace0Anchor">specifiesNamespace</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NamespaceDecl.html">NamespaceDecl</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="specifiesNamespace0"><pre>Matches nested name specifiers that specify a namespace matching the +given namespace matcher. + +Given + namespace ns { struct A {}; } + ns::A a; +nestedNameSpecifier(specifiesNamespace(hasName("ns"))) + matches "ns::" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1NestedNameSpecifier.html">NestedNameSpecifier</a>></td><td class="name" onclick="toggle('specifiesType0')"><a name="specifiesType0Anchor">specifiesType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="specifiesType0"><pre>Matches nested name specifiers that specify a type matching the +given QualType matcher without qualifiers. + +Given + struct A { struct B { struct C {}; }; }; + A::B::C c; +nestedNameSpecifier(specifiesType(hasDeclaration(recordDecl(hasName("A"))))) + matches "A::" +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ParenType.html">ParenType</a>></td><td class="name" onclick="toggle('innerType0')"><a name="innerType0Anchor">innerType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td></tr> +<tr><td colspan="4" class="doc" id="innerType0"><pre>Matches ParenType nodes where the inner type is a specific type. + +Given + int (*ptr_to_array)[4]; + int (*ptr_to_func)(int); + +varDecl(hasType(pointsTo(parenType(innerType(functionType()))))) matches +ptr_to_func but not ptr_to_array. + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ParenType.html">ParenType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerTypeLoc.html">PointerTypeLoc</a>></td><td class="name" onclick="toggle('pointeeLoc1')"><a name="pointeeLoc1Anchor">pointeeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td></tr> +<tr><td colspan="4" class="doc" id="pointeeLoc1"><pre>Narrows PointerType (and similar) matchers to those where the +pointee matches a given matcher. + +Given + int *a; + int const *b; + float const *f; +pointerType(pointee(isConstQualified(), isInteger())) + matches "int const *b" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BlockPointerType.html">BlockPointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberPointerType.html">MemberPointerType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerType.html">PointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReferenceType.html">ReferenceType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerType.html">PointerType</a>></td><td class="name" onclick="toggle('pointee1')"><a name="pointee1Anchor">pointee</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td></tr> +<tr><td colspan="4" class="doc" id="pointee1"><pre>Narrows PointerType (and similar) matchers to those where the +pointee matches a given matcher. + +Given + int *a; + int const *b; + float const *f; +pointerType(pointee(isConstQualified(), isInteger())) + matches "int const *b" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BlockPointerType.html">BlockPointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberPointerType.html">MemberPointerType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerType.html">PointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReferenceType.html">ReferenceType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>></td><td class="name" onclick="toggle('hasCanonicalType0')"><a name="hasCanonicalType0Anchor">hasCanonicalType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasCanonicalType0"><pre>Matches QualTypes whose canonical type matches InnerMatcher. + +Given: + typedef int &int_ref; + int a; + int_ref b = a; + +varDecl(hasType(qualType(referenceType()))))) will not match the +declaration of b but varDecl(hasType(qualType(hasCanonicalType(referenceType())))))) does. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>></td><td class="name" onclick="toggle('hasDeclaration5')"><a name="hasDeclaration5Anchor">hasDeclaration</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasDeclaration5"><pre>Matches a type if the declaration of the type matches the given matcher. -Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>> +In addition to being usable as Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>, also usable as +Matcher<T> for any T supporting the getDecl() member function. e.g. various +subtypes of clang::Type. + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberExpr.html">MemberExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateSpecializationType.html">TemplateSpecializationType</a>> </pre></td></tr> @@ -1799,6 +3113,38 @@ Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualTy </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReferenceTypeLoc.html">ReferenceTypeLoc</a>></td><td class="name" onclick="toggle('pointeeLoc0')"><a name="pointeeLoc0Anchor">pointeeLoc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td></tr> +<tr><td colspan="4" class="doc" id="pointeeLoc0"><pre>Narrows PointerType (and similar) matchers to those where the +pointee matches a given matcher. + +Given + int *a; + int const *b; + float const *f; +pointerType(pointee(isConstQualified(), isInteger())) + matches "int const *b" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BlockPointerType.html">BlockPointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberPointerType.html">MemberPointerType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerType.html">PointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReferenceType.html">ReferenceType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReferenceType.html">ReferenceType</a>></td><td class="name" onclick="toggle('pointee0')"><a name="pointee0Anchor">pointee</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>></td></tr> +<tr><td colspan="4" class="doc" id="pointee0"><pre>Narrows PointerType (and similar) matchers to those where the +pointee matches a given matcher. + +Given + int *a; + int const *b; + float const *f; +pointerType(pointee(isConstQualified(), isInteger())) + matches "int const *b" + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1BlockPointerType.html">BlockPointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberPointerType.html">MemberPointerType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerType.html">PointerType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1ReferenceType.html">ReferenceType</a>> +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>></td><td class="name" onclick="toggle('alignOfExpr0')"><a name="alignOfExpr0Anchor">alignOfExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1UnaryExprOrTypeTraitExpr.html">UnaryExprOrTypeTraitExpr</a>> InnerMatcher</td></tr> <tr><td colspan="4" class="doc" id="alignOfExpr0"><pre>Same as unaryExprOrTypeTraitExpr, but only matching alignof. @@ -1838,6 +3184,40 @@ classTemplateSpecializationDecl(hasAnyTemplateArgument( </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateSpecializationType.html">TemplateSpecializationType</a>></td><td class="name" onclick="toggle('hasDeclaration0')"><a name="hasDeclaration0Anchor">hasDeclaration</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasDeclaration0"><pre>Matches a type if the declaration of the type matches the given +matcher. + +In addition to being usable as Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>, also usable as +Matcher<T> for any T supporting the getDecl() member function. e.g. various +subtypes of clang::Type. + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberExpr.html">MemberExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateSpecializationType.html">TemplateSpecializationType</a>> +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>></td><td class="name" onclick="toggle('loc0')"><a name="loc0Anchor">loc</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="loc0"><pre>Matches TypeLocs for which the given inner +QualType-matcher matches. +</pre></td></tr> + + +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>></td><td class="name" onclick="toggle('hasDeclaration1')"><a name="hasDeclaration1Anchor">hasDeclaration</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasDeclaration1"><pre>Matches a type if the declaration of the type matches the given +matcher. + +In addition to being usable as Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>, also usable as +Matcher<T> for any T supporting the getDecl() member function. e.g. various +subtypes of clang::Type. + +Usable as: Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CallExpr.html">CallExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberExpr.html">MemberExpr</a>>, Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TypedefType.html">TypedefType</a>>, + Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateSpecializationType.html">TemplateSpecializationType</a>> +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1UnaryExprOrTypeTraitExpr.html">UnaryExprOrTypeTraitExpr</a>></td><td class="name" onclick="toggle('hasArgumentOfType0')"><a name="hasArgumentOfType0Anchor">hasArgumentOfType</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>> InnerMatcher</td></tr> <tr><td colspan="4" class="doc" id="hasArgumentOfType0"><pre>Matches unary expressions that have a specific type of argument. @@ -1851,7 +3231,7 @@ unaryExprOrTypeTraitExpr(hasArgumentOfType(asString("int")) <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1UnaryOperator.html">UnaryOperator</a>></td><td class="name" onclick="toggle('hasUnaryOperand0')"><a name="hasUnaryOperand0Anchor">hasUnaryOperand</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>> InnerMatcher</td></tr> <tr><td colspan="4" class="doc" id="hasUnaryOperand0"><pre>Matches if the operand of a unary operator matches. -Example matches true (matcher = hasOperand(boolLiteral(equals(true)))) +Example matches true (matcher = hasUnaryOperand(boolLiteral(equals(true)))) !true </pre></td></tr> @@ -1907,6 +3287,20 @@ Example matches x (matcher = varDecl(hasInitializer(callExpr()))) </pre></td></tr> +<tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1VariableArrayType.html">VariableArrayType</a>></td><td class="name" onclick="toggle('hasSizeExpr0')"><a name="hasSizeExpr0Anchor">hasSizeExpr</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>> InnerMatcher</td></tr> +<tr><td colspan="4" class="doc" id="hasSizeExpr0"><pre>Matches VariableArrayType nodes that have a specific size +expression. + +Given + void f(int b) { + int a[b]; + } +variableArrayType(hasSizeExpr(ignoringImpCasts(declRefExpr(to( + varDecl(hasName("b"))))))) + matches "int a[b]" +</pre></td></tr> + + <tr><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1WhileStmt.html">WhileStmt</a>></td><td class="name" onclick="toggle('hasBody2')"><a name="hasBody2Anchor">hasBody</a></td><td>Matcher<<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>> InnerMatcher</td></tr> <tr><td colspan="4" class="doc" id="hasBody2"><pre>Matches a 'for', 'while', or 'do while' statement that has a given body. diff --git a/docs/LibASTMatchersTutorial.rst b/docs/LibASTMatchersTutorial.rst new file mode 100644 index 0000000..ba568e3 --- /dev/null +++ b/docs/LibASTMatchersTutorial.rst @@ -0,0 +1,538 @@ +=============================================================== +Tutorial for building tools using LibTooling and LibASTMatchers +=============================================================== + +This document is intended to show how to build a useful source-to-source +translation tool based on Clang's `LibTooling <LibTooling.html>`_. It is +explicitly aimed at people who are new to Clang, so all you should need +is a working knowledge of C++ and the command line. + +In order to work on the compiler, you need some basic knowledge of the +abstract syntax tree (AST). To this end, the reader is incouraged to +skim the :doc:`Introduction to the Clang +AST <IntroductionToTheClangAST>` + +Step 0: Obtaining Clang +======================= + +As Clang is part of the LLVM project, you'll need to download LLVM's +source code first. Both Clang and LLVM are maintained as Subversion +repositories, but we'll be accessing them through the git mirror. For +further information, see the `getting started +guide <http://llvm.org/docs/GettingStarted.html>`_. + +.. code-block:: console + + mkdir ~/clang-llvm && cd ~/clang-llvm + git clone http://llvm.org/git/llvm.git + cd llvm/tools + git clone http://llvm.org/git/clang.git + +Next you need to obtain the CMake build system and Ninja build tool. You +may already have CMake installed, but current binary versions of CMake +aren't built with Ninja support. + +.. code-block:: console + + cd ~/clang-llvm + git clone https://github.com/martine/ninja.git + cd ninja + git checkout release + ./bootstrap.py + sudo cp ninja /usr/bin/ + + cd ~/clang-llvm + git clone git://cmake.org/stage/cmake.git + cd cmake + git checkout next + ./bootstrap + make + sudo make install + +Okay. Now we'll build Clang! + +.. code-block:: console + + cd ~/clang-llvm + mkdir build && cd build + cmake -G Ninja ../llvm -DLLVM_BUILD_TESTS=ON # Enable tests; default is off. + ninja + ninja check # Test LLVM only. + ninja clang-test # Test Clang only. + ninja install + +And we're live. + +All of the tests should pass, though there is a (very) small chance that +you can catch LLVM and Clang out of sync. Running ``'git svn rebase'`` +in both the llvm and clang directories should fix any problems. + +Finally, we want to set Clang as its own compiler. + +.. code-block:: console + + cd ~/clang-llvm/build + ccmake ../llvm + +The second command will bring up a GUI for configuring Clang. You need +to set the entry for ``CMAKE_CXX_COMPILER``. Press ``'t'`` to turn on +advanced mode. Scroll down to ``CMAKE_CXX_COMPILER``, and set it to +``/usr/bin/clang++``, or wherever you installed it. Press ``'c'`` to +configure, then ``'g'`` to generate CMake's files. + +Finally, run ninja one last time, and you're done. + +Step 1: Create a ClangTool +========================== + +Now that we have enough background knowledge, it's time to create the +simplest productive ClangTool in existence: a syntax checker. While this +already exists as ``clang-check``, it's important to understand what's +going on. + +First, we'll need to create a new directory for our tool and tell CMake +that it exists. As this is not going to be a core clang tool, it will +live in the ``tools/extra`` repository. + +.. code-block:: console + + cd ~/clang-llvm/llvm/tools/clang + mkdir tools/extra/loop-convert + echo 'add_subdirectory(loop-convert)' >> tools/extra/CMakeLists.txt + vim tools/extra/loop-convert/CMakeLists.txt + +CMakeLists.txt should have the following contents: + +:: + + set(LLVM_LINK_COMPONENTS support) + set(LLVM_USED_LIBS clangTooling clangBasic clangAST) + + add_clang_executable(loop-convert + LoopConvert.cpp + ) + target_link_libraries(loop-convert + clangTooling + clangBasic + clangASTMatchers + ) + +With that done, Ninja will be able to compile our tool. Let's give it +something to compile! Put the following into +``tools/extra/loop-convert/LoopConvert.cpp``. A detailed explanation of +why the different parts are needed can be found in the `LibTooling +documentation <LibTooling.html>`_. + +.. code-block:: c++ + + // Declares clang::SyntaxOnlyAction. + #include "clang/Frontend/FrontendActions.h" + #include "clang/Tooling/CommonOptionsParser.h" + #include "clang/Tooling/Tooling.h" + // Declares llvm::cl::extrahelp. + #include "llvm/Support/CommandLine.h" + + using namespace clang::tooling; + using namespace llvm; + + // CommonOptionsParser declares HelpMessage with a description of the common + // command-line options related to the compilation database and input files. + // It's nice to have this help message in all tools. + static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); + + // A help message for this specific tool can be added afterwards. + static cl::extrahelp MoreHelp("\nMore help text..."); + + int main(int argc, const char **argv) { + CommonOptionsParser OptionsParser(argc, argv); + ClangTool Tool(OptionsParser.getCompilations(), + OptionsParser.getSourcePathList()); + return Tool.run(newFrontendActionFactory<clang::SyntaxOnlyAction>()); + } + +And that's it! You can compile our new tool by running ninja from the +``build`` directory. + +.. code-block:: console + + cd ~/clang-llvm/build + ninja + +You should now be able to run the syntax checker, which is located in +``~/clang-llvm/build/bin``, on any source file. Try it! + +.. code-block:: console + + cat "void main() {}" > test.cpp + bin/loop-convert test.cpp -- + +Note the two dashes after we specify the source file. The additional +options for the compiler are passed after the dashes rather than loading +them from a compilation database - there just aren't any options needed +right now. + +Intermezzo: Learn AST matcher basics +==================================== + +Clang recently introduced the :doc:`ASTMatcher +library <LibASTMatchers>` to provide a simple, powerful, and +concise way to describe specific patterns in the AST. Implemented as a +DSL powered by macros and templates (see +`ASTMatchers.h <../doxygen/ASTMatchers_8h_source.html>`_ if you're +curious), matchers offer the feel of algebraic data types common to +functional programming languages. + +For example, suppose you wanted to examine only binary operators. There +is a matcher to do exactly that, conveniently named ``binaryOperator``. +I'll give you one guess what this matcher does: + +.. code-block:: c++ + + binaryOperator(hasOperatorName("+"), hasLHS(integerLiteral(equals(0)))) + +Shockingly, it will match against addition expressions whose left hand +side is exactly the literal 0. It will not match against other forms of +0, such as ``'\0'`` or ``NULL``, but it will match against macros that +expand to 0. The matcher will also not match against calls to the +overloaded operator ``'+'``, as there is a separate ``operatorCallExpr`` +matcher to handle overloaded operators. + +There are AST matchers to match all the different nodes of the AST, +narrowing matchers to only match AST nodes fulfilling specific criteria, +and traversal matchers to get from one kind of AST node to another. For +a complete list of AST matchers, take a look at the `AST Matcher +References <LibASTMatchersReference.html>`_ + +All matcher that are nouns describe entities in the AST and can be +bound, so that they can be referred to whenever a match is found. To do +so, simply call the method ``bind`` on these matchers, e.g.: + +.. code-block:: c++ + + variable(hasType(isInteger())).bind("intvar") + +Step 2: Using AST matchers +========================== + +Okay, on to using matchers for real. Let's start by defining a matcher +which will capture all ``for`` statements that define a new variable +initialized to zero. Let's start with matching all ``for`` loops: + +.. code-block:: c++ + + forStmt() + +Next, we want to specify that a single variable is declared in the first +portion of the loop, so we can extend the matcher to + +.. code-block:: c++ + + forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl())))) + +Finally, we can add the condition that the variable is initialized to +zero. + +.. code-block:: c++ + + forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl( + hasInitializer(integerLiteral(equals(0)))))))) + +It is fairly easy to read and understand the matcher definition ("match +loops whose init portion declares a single variable which is initialized +to the integer literal 0"), but deciding that every piece is necessary +is more difficult. Note that this matcher will not match loops whose +variables are initialized to ``'\0'``, ``0.0``, ``NULL``, or any form of +zero besides the integer 0. + +The last step is giving the matcher a name and binding the ``ForStmt`` +as we will want to do something with it: + +.. code-block:: c++ + + StatementMatcher LoopMatcher = + forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl( + hasInitializer(integerLiteral(equals(0)))))))).bind("forLoop"); + +Once you have defined your matchers, you will need to add a little more +scaffolding in order to run them. Matchers are paired with a +``MatchCallback`` and registered with a ``MatchFinder`` object, then run +from a ``ClangTool``. More code! + +Add the following to ``LoopConvert.cpp``: + +.. code-block:: c++ + + #include "clang/ASTMatchers/ASTMatchers.h" + #include "clang/ASTMatchers/ASTMatchFinder.h" + + using namespace clang; + using namespace clang::ast_matchers; + + StatementMatcher LoopMatcher = + forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl( + hasInitializer(integerLiteral(equals(0)))))))).bind("forLoop"); + + class LoopPrinter : public MatchFinder::MatchCallback { + public : + virtual void run(const MatchFinder::MatchResult &Result) { + if (const ForStmt *FS = Result.Nodes.getNodeAs<clang::ForStmt>("forLoop")) + FS->dump(); + }; + +And change ``main()`` to: + +.. code-block:: c++ + + int main(int argc, const char **argv) { + CommonOptionsParser OptionsParser(argc, argv); + ClangTool Tool(OptionsParser.getCompilations(), + OptionsParser.getSourcePathList()); + + LoopPrinter Printer; + MatchFinder Finder; + Finder.addMatcher(LoopMatcher, &Printer); + + return Tool.run(newFrontendActionFactory(&Finder)); + } + +Now, you should be able to recompile and run the code to discover for +loops. Create a new file with a few examples, and test out our new +handiwork: + +.. code-block:: console + + cd ~/clang-llvm/llvm/llvm_build/ + ninja loop-convert + vim ~/test-files/simple-loops.cc + bin/loop-convert ~/test-files/simple-loops.cc + +Step 3.5: More Complicated Matchers +=================================== + +Our simple matcher is capable of discovering for loops, but we would +still need to filter out many more ourselves. We can do a good portion +of the remaining work with some cleverly chosen matchers, but first we +need to decide exactly which properties we want to allow. + +How can we characterize for loops over arrays which would be eligible +for translation to range-based syntax? Range based loops over arrays of +size ``N`` that: + +- start at index ``0`` +- iterate consecutively +- end at index ``N-1`` + +We already check for (1), so all we need to add is a check to the loop's +condition to ensure that the loop's index variable is compared against +``N`` and another check to ensure that the increment step just +increments this same variable. The matcher for (2) is straightforward: +require a pre- or post-increment of the same variable declared in the +init portion. + +Unfortunately, such a matcher is impossible to write. Matchers contain +no logic for comparing two arbitrary AST nodes and determining whether +or not they are equal, so the best we can do is matching more than we +would like to allow, and punting extra comparisons to the callback. + +In any case, we can start building this sub-matcher. We can require that +the increment step be a unary increment like this: + +.. code-block:: c++ + + hasIncrement(unaryOperator(hasOperatorName("++"))) + +Specifying what is incremented introduces another quirk of Clang's AST: +Usages of variables are represented as ``DeclRefExpr``'s ("declaration +reference expressions") because they are expressions which refer to +variable declarations. To find a ``unaryOperator`` that refers to a +specific declaration, we can simply add a second condition to it: + +.. code-block:: c++ + + hasIncrement(unaryOperator( + hasOperatorName("++"), + hasUnaryOperand(declRefExpr()))) + +Furthermore, we can restrict our matcher to only match if the +incremented variable is an integer: + +.. code-block:: c++ + + hasIncrement(unaryOperator( + hasOperatorName("++"), + hasUnaryOperand(declRefExpr(to(varDecl(hasType(isInteger()))))))) + +And the last step will be to attach an identifier to this variable, so +that we can retrieve it in the callback: + +.. code-block:: c++ + + hasIncrement(unaryOperator( + hasOperatorName("++"), + hasUnaryOperand(declRefExpr(to( + varDecl(hasType(isInteger())).bind("incrementVariable")))))) + +We can add this code to the definition of ``LoopMatcher`` and make sure +that our program, outfitted with the new matcher, only prints out loops +that declare a single variable initialized to zero and have an increment +step consisting of a unary increment of some variable. + +Now, we just need to add a matcher to check if the condition part of the +``for`` loop compares a variable against the size of the array. There is +only one problem - we don't know which array we're iterating over +without looking at the body of the loop! We are again restricted to +approximating the result we want with matchers, filling in the details +in the callback. So we start with: + +.. code-block:: c++ + + hasCondition(binaryOperator(hasOperatorName("<")) + +It makes sense to ensure that the left-hand side is a reference to a +variable, and that the right-hand side has integer type. + +.. code-block:: c++ + + hasCondition(binaryOperator( + hasOperatorName("<"), + hasLHS(declRefExpr(to(varDecl(hasType(isInteger()))))), + hasRHS(expr(hasType(isInteger()))))) + +Why? Because it doesn't work. Of the three loops provided in +``test-files/simple.cpp``, zero of them have a matching condition. A +quick look at the AST dump of the first for loop, produced by the +previous iteration of loop-convert, shows us the answer: + +:: + + (ForStmt 0x173b240 + (DeclStmt 0x173afc8 + 0x173af50 "int i = + (IntegerLiteral 0x173afa8 'int' 0)") + <<>> + (BinaryOperator 0x173b060 '_Bool' '<' + (ImplicitCastExpr 0x173b030 'int' + (DeclRefExpr 0x173afe0 'int' lvalue Var 0x173af50 'i' 'int')) + (ImplicitCastExpr 0x173b048 'int' + (DeclRefExpr 0x173b008 'const int' lvalue Var 0x170fa80 'N' 'const int'))) + (UnaryOperator 0x173b0b0 'int' lvalue prefix '++' + (DeclRefExpr 0x173b088 'int' lvalue Var 0x173af50 'i' 'int')) + (CompoundStatement … + +We already know that the declaration and increments both match, or this +loop wouldn't have been dumped. The culprit lies in the implicit cast +applied to the first operand (i.e. the LHS) of the less-than operator, +an L-value to R-value conversion applied to the expression referencing +``i``. Thankfully, the matcher library offers a solution to this problem +in the form of ``ignoringParenImpCasts``, which instructs the matcher to +ignore implicit casts and parentheses before continuing to match. +Adjusting the condition operator will restore the desired match. + +.. code-block:: c++ + + hasCondition(binaryOperator( + hasOperatorName("<"), + hasLHS(ignoringParenImpCasts(declRefExpr( + to(varDecl(hasType(isInteger())))))), + hasRHS(expr(hasType(isInteger()))))) + +After adding binds to the expressions we wished to capture and +extracting the identifier strings into variables, we have array-step-2 +completed. + +Step 4: Retrieving Matched Nodes +================================ + +So far, the matcher callback isn't very interesting: it just dumps the +loop's AST. At some point, we will need to make changes to the input +source code. Next, we'll work on using the nodes we bound in the +previous step. + +The ``MatchFinder::run()`` callback takes a +``MatchFinder::MatchResult&`` as its parameter. We're most interested in +its ``Context`` and ``Nodes`` members. Clang uses the ``ASTContext`` +class to represent contextual information about the AST, as the name +implies, though the most functionally important detail is that several +operations require an ``ASTContext*`` parameter. More immediately useful +is the set of matched nodes, and how we retrieve them. + +Since we bind three variables (identified by ConditionVarName, +InitVarName, and IncrementVarName), we can obtain the matched nodes by +using the ``getNodeAs()`` member function. + +In ``LoopActions.cpp``: + +.. code-block:: c++ + + #include "clang/AST/ASTContext.h" + + void LoopPrinter::run(const MatchFinder::MatchResult &Result) { + ASTContext *Context = Result.Context; + const ForStmt *FS = Result.Nodes.getStmtAs<ForStmt>(LoopName); + // We do not want to convert header files! + if (!FS || !Context->getSourceManager().isFromMainFile(FS->getForLoc())) + return; + const VarDecl *IncVar = Result.Nodes.getNodeAs<VarDecl>(IncrementVarName); + const VarDecl *CondVar = Result.Nodes.getNodeAs<VarDecl>(ConditionVarName); + const VarDecl *InitVar = Result.Nodes.getNodeAs<VarDecl>(InitVarName); + +Now that we have the three variables, represented by their respective +declarations, let's make sure that they're all the same, using a helper +function I call ``areSameVariable()``. + +.. code-block:: c++ + + if (!areSameVariable(IncVar, CondVar) || !areSameVariable(IncVar, InitVar)) + return; + llvm::outs() << "Potential array-based loop discovered.\n"; + } + +If execution reaches the end of ``LoopPrinter::run()``, we know that the +loop shell that looks like + +.. code-block:: c++ + + for (int i= 0; i < expr(); ++i) { ... } + +For now, we will just print a message explaining that we found a loop. +The next section will deal with recursively traversing the AST to +discover all changes needed. + +As a side note, here is the implementation of ``areSameVariable``. Clang +associates a ``VarDecl`` with each variable to represent the variable's +declaration. Since the "canonical" form of each declaration is unique by +address, all we need to do is make sure neither ``ValueDecl`` (base +class of ``VarDecl``) is ``NULL`` and compare the canonical Decls. + +.. code-block:: c++ + + static bool areSameVariable(const ValueDecl *First, const ValueDecl *Second) { + return First && Second && + First->getCanonicalDecl() == Second->getCanonicalDecl(); + } + +It's not as trivial to test if two expressions are the same, though +Clang has already done the hard work for us by providing a way to +canonicalize expressions: + +.. code-block:: c++ + + static bool areSameExpr(ASTContext *Context, const Expr *First, + const Expr *Second) { + if (!First || !Second) + return false; + llvm::FoldingSetNodeID FirstID, SecondID; + First->Profile(FirstID, *Context, true); + Second->Profile(SecondID, *Context, true); + return FirstID == SecondID; + } + +This code relies on the comparison between two +``llvm::FoldingSetNodeIDs``. As the documentation for +``Stmt::Profile()`` indicates, the ``Profile()`` member function builds +a description of a node in the AST, based on its properties, along with +those of its children. ``FoldingSetNodeID`` then serves as a hash we can +use to compare expressions. We will need ``areSameExpr`` later. Before +you run the new code on the additional loops added to +test-files/simple.cpp, try to figure out which ones will be considered +potentially convertible. diff --git a/docs/LibFormat.rst b/docs/LibFormat.rst new file mode 100644 index 0000000..eacdc16 --- /dev/null +++ b/docs/LibFormat.rst @@ -0,0 +1,56 @@ +========= +LibFormat +========= + +LibFormat is a library that implements automatic source code formatting based +on Clang. This documents describes the LibFormat interface and design as well +as some basic style discussions. + +If you just want to use `clang-format` as a tool or integrated into an editor, +checkout :doc:`ClangFormat`. + +Design +------ + +FIXME: Write up design. + + +Interface +--------- + +The core routine of LibFormat is ``reformat()``: + +.. code-block:: c++ + + tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, + SourceManager &SourceMgr, + std::vector<CharSourceRange> Ranges); + +This reads a token stream out of the lexer ``Lex`` and reformats all the code +ranges in ``Ranges``. The ``FormatStyle`` controls basic decisions made during +formatting. A list of options can be found under :ref:`style-options`. + + +.. _style-options: + +Style Options +------------- + +The style options describe specific formatting options that can be used in +order to make `ClangFormat` comply with different style guides. Currently, +two style guides are hard-coded: + +.. code-block:: c++ + + /// \brief Returns a format style complying with the LLVM coding standards: + /// http://llvm.org/docs/CodingStandards.html. + FormatStyle getLLVMStyle(); + + /// \brief Returns a format style complying with Google's C++ style guide: + /// http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml. + FormatStyle getGoogleStyle(); + +These options are also exposed in the :doc:`standalone tools <ClangFormat>` +through the `-style` option. + +In the future, we plan on making this configurable. diff --git a/docs/LibTooling.html b/docs/LibTooling.html deleted file mode 100644 index 163d24a..0000000 --- a/docs/LibTooling.html +++ /dev/null @@ -1,212 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>LibTooling</title> -<link type="text/css" rel="stylesheet" href="../menu.css"> -<link type="text/css" rel="stylesheet" href="../content.css"> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>LibTooling</h1> -<p>LibTooling is a library to support writing standalone tools based on -Clang. This document will provide a basic walkthrough of how to write -a tool using LibTooling.</p> -<p>For the information on how to setup Clang Tooling for LLVM see -<a href="HowToSetupToolingForLLVM.html">HowToSetupToolingForLLVM.html</a></p> - -<!-- ======================================================================= --> -<h2 id="intro">Introduction</h2> -<!-- ======================================================================= --> - -<p>Tools built with LibTooling, like Clang Plugins, run -<code>FrontendActions</code> over code. -<!-- See FIXME for a tutorial on how to write FrontendActions. --> -In this tutorial, we'll demonstrate the different ways of running clang's -<code>SyntaxOnlyAction</code>, which runs a quick syntax check, over a bunch of -code.</p> - -<!-- ======================================================================= --> -<h2 id="runoncode">Parsing a code snippet in memory.</h2> -<!-- ======================================================================= --> - -<p>If you ever wanted to run a <code>FrontendAction</code> over some sample -code, for example to unit test parts of the Clang AST, -<code>runToolOnCode</code> is what you looked for. Let me give you an example: -<pre> - #include "clang/Tooling/Tooling.h" - - TEST(runToolOnCode, CanSyntaxCheckCode) { - // runToolOnCode returns whether the action was correctly run over the - // given code. - EXPECT_TRUE(runToolOnCode(new clang::SyntaxOnlyAction, "class X {};")); - } -</pre> - -<!-- ======================================================================= --> -<h2 id="standalonetool">Writing a standalone tool.</h2> -<!-- ======================================================================= --> - -<p>Once you unit tested your <code>FrontendAction</code> to the point where it -cannot possibly break, it's time to create a standalone tool. For a standalone -tool to run clang, it first needs to figure out what command line arguments to -use for a specified file. To that end we create a -<code>CompilationDatabase</code>. There are different ways to create a -compilation database, and we need to support all of them depending on -command-line options. There's the <code>CommonOptionsParser</code> class -that takes the responsibility to parse command-line parameters related to -compilation databases and inputs, so that all tools share the implementation. -</p> - -<h3 id="parsingcommonoptions">Parsing common tools options.</h3> -<p><code>CompilationDatabase</code> can be read from a build directory or the -command line. Using <code>CommonOptionsParser</code> allows for explicit -specification of a compile command line, specification of build path using the -<code>-p</code> command-line option, and automatic location of the compilation -database using source files paths. -<pre> -#include "clang/Tooling/CommonOptionsParser.h" - -using namespace clang::tooling; - -int main(int argc, const char **argv) { - // CommonOptionsParser constructor will parse arguments and create a - // CompilationDatabase. In case of error it will terminate the program. - CommonOptionsParser OptionsParser(argc, argv); - - // Use OptionsParser.GetCompilations() and OptionsParser.GetSourcePathList() - // to retrieve CompilationDatabase and the list of input file paths. -} -</pre> -</p> - -<h3 id="tool">Creating and running a ClangTool.</h3> -<p>Once we have a <code>CompilationDatabase</code>, we can create a -<code>ClangTool</code> and run our <code>FrontendAction</code> over some code. -For example, to run the <code>SyntaxOnlyAction</code> over the files "a.cc" and -"b.cc" one would write: -<pre> - // A clang tool can run over a number of sources in the same process... - std::vector<std::string> Sources; - Sources.push_back("a.cc"); - Sources.push_back("b.cc"); - - // We hand the CompilationDatabase we created and the sources to run over into - // the tool constructor. - ClangTool Tool(OptionsParser.GetCompilations(), Sources); - - // The ClangTool needs a new FrontendAction for each translation unit we run - // on. Thus, it takes a FrontendActionFactory as parameter. To create a - // FrontendActionFactory from a given FrontendAction type, we call - // newFrontendActionFactory<clang::SyntaxOnlyAction>(). - int result = Tool.run(newFrontendActionFactory<clang::SyntaxOnlyAction>()); -</pre> -</p> - -<h3 id="main">Putting it together - the first tool.</h3> -<p>Now we combine the two previous steps into our first real tool. This example -tool is also checked into the clang tree at tools/clang-check/ClangCheck.cpp. -<pre> -// Declares clang::SyntaxOnlyAction. -#include "clang/Frontend/FrontendActions.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Tooling.h" -// Declares llvm::cl::extrahelp. -#include "llvm/Support/CommandLine.h" - -using namespace clang::tooling; -using namespace llvm; - -// CommonOptionsParser declares HelpMessage with a description of the common -// command-line options related to the compilation database and input files. -// It's nice to have this help message in all tools. -static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); - -// A help message for this specific tool can be added afterwards. -static cl::extrahelp MoreHelp("\nMore help text..."); - -int main(int argc, const char **argv) { - CommonOptionsParser OptionsParser(argc, argv); - ClangTool Tool(OptionsParser.GetCompilations(), - OptionsParser.GetSourcePathList()); - return Tool.run(newFrontendActionFactory<clang::SyntaxOnlyAction>()); -} -</pre> -</p> - -<h3 id="running">Running the tool on some code.</h3> -<p>When you check out and build clang, clang-check is already built and -available to you in bin/clang-check inside your build directory.</p> -<p>You can run clang-check on a file in the llvm repository by specifying -all the needed parameters after a "--" separator: -<pre> - $ cd /path/to/source/llvm - $ export BD=/path/to/build/llvm - $ $BD/bin/clang-check tools/clang/tools/clang-check/ClangCheck.cpp -- \ - clang++ -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS \ - -Itools/clang/include -I$BD/include -Iinclude -Itools/clang/lib/Headers -c -</pre> -</p> - -<p>As an alternative, you can also configure cmake to output a compile command -database into its build directory: -<pre> - # Alternatively to calling cmake, use ccmake, toggle to advanced mode and - # set the parameter CMAKE_EXPORT_COMPILE_COMMANDS from the UI. - $ cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON . -</pre> -</p> -<p> -This creates a file called compile_commands.json in the build directory. Now -you can run clang-check over files in the project by specifying the build path -as first argument and some source files as further positional arguments: -<pre> - $ cd /path/to/source/llvm - $ export BD=/path/to/build/llvm - $ $BD/bin/clang-check -p $BD tools/clang/tools/clang-check/ClangCheck.cpp -</pre> -</p> - -<h3 id="builtin">Builtin includes.</h3> -<p>Clang tools need their builtin headers and search for them the same way clang -does. Thus, the default location to look for builtin headers is in a path -$(dirname /path/to/tool)/../lib/clang/3.2/include relative to the tool -binary. This works out-of-the-box for tools running from llvm's toplevel -binary directory after building clang-headers, or if the tool is running -from the binary directory of a clang install next to the clang binary.</p> - -<p>Tips: if your tool fails to find stddef.h or similar headers, call -the tool with -v and look at the search paths it looks through.</p> - -<h3 id="linking">Linking.</h3> -<p>Please note that this presents the linking requirements at the time of this -writing. For the most up-to-date information, look at one of the tools' -Makefiles (for example -<a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-check/Makefile?view=markup">clang-check/Makefile</a>). -</p> - -<p>To link a binary using the tooling infrastructure, link in the following -libraries: -<ul> -<li>Tooling</li> -<li>Frontend</li> -<li>Driver</li> -<li>Serialization</li> -<li>Parse</li> -<li>Sema</li> -<li>Analysis</li> -<li>Edit</li> -<li>AST</li> -<li>Lex</li> -<li>Basic</li> -</ul> -</p> - -</div> -</body> -</html> - diff --git a/docs/LibTooling.rst b/docs/LibTooling.rst new file mode 100644 index 0000000..a9c24c3 --- /dev/null +++ b/docs/LibTooling.rst @@ -0,0 +1,192 @@ +========== +LibTooling +========== + +LibTooling is a library to support writing standalone tools based on Clang. +This document will provide a basic walkthrough of how to write a tool using +LibTooling. + +For the information on how to setup Clang Tooling for LLVM see +:doc:`HowToSetupToolingForLLVM` + +Introduction +------------ + +Tools built with LibTooling, like Clang Plugins, run ``FrontendActions`` over +code. + +.. See FIXME for a tutorial on how to write FrontendActions. + +In this tutorial, we'll demonstrate the different ways of running Clang's +``SyntaxOnlyAction``, which runs a quick syntax check, over a bunch of code. + +Parsing a code snippet in memory +-------------------------------- + +If you ever wanted to run a ``FrontendAction`` over some sample code, for +example to unit test parts of the Clang AST, ``runToolOnCode`` is what you +looked for. Let me give you an example: + +.. code-block:: c++ + + #include "clang/Tooling/Tooling.h" + + TEST(runToolOnCode, CanSyntaxCheckCode) { + // runToolOnCode returns whether the action was correctly run over the + // given code. + EXPECT_TRUE(runToolOnCode(new clang::SyntaxOnlyAction, "class X {};")); + } + +Writing a standalone tool +------------------------- + +Once you unit tested your ``FrontendAction`` to the point where it cannot +possibly break, it's time to create a standalone tool. For a standalone tool +to run clang, it first needs to figure out what command line arguments to use +for a specified file. To that end we create a ``CompilationDatabase``. There +are different ways to create a compilation database, and we need to support all +of them depending on command-line options. There's the ``CommonOptionsParser`` +class that takes the responsibility to parse command-line parameters related to +compilation databases and inputs, so that all tools share the implementation. + +Parsing common tools options +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``CompilationDatabase`` can be read from a build directory or the command line. +Using ``CommonOptionsParser`` allows for explicit specification of a compile +command line, specification of build path using the ``-p`` command-line option, +and automatic location of the compilation database using source files paths. + +.. code-block:: c++ + + #include "clang/Tooling/CommonOptionsParser.h" + + using namespace clang::tooling; + + int main(int argc, const char **argv) { + // CommonOptionsParser constructor will parse arguments and create a + // CompilationDatabase. In case of error it will terminate the program. + CommonOptionsParser OptionsParser(argc, argv); + + // Use OptionsParser.getCompilations() and OptionsParser.getSourcePathList() + // to retrieve CompilationDatabase and the list of input file paths. + } + +Creating and running a ClangTool +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Once we have a ``CompilationDatabase``, we can create a ``ClangTool`` and run +our ``FrontendAction`` over some code. For example, to run the +``SyntaxOnlyAction`` over the files "a.cc" and "b.cc" one would write: + +.. code-block:: c++ + + // A clang tool can run over a number of sources in the same process... + std::vector<std::string> Sources; + Sources.push_back("a.cc"); + Sources.push_back("b.cc"); + + // We hand the CompilationDatabase we created and the sources to run over into + // the tool constructor. + ClangTool Tool(OptionsParser.getCompilations(), Sources); + + // The ClangTool needs a new FrontendAction for each translation unit we run + // on. Thus, it takes a FrontendActionFactory as parameter. To create a + // FrontendActionFactory from a given FrontendAction type, we call + // newFrontendActionFactory<clang::SyntaxOnlyAction>(). + int result = Tool.run(newFrontendActionFactory<clang::SyntaxOnlyAction>()); + +Putting it together --- the first tool +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Now we combine the two previous steps into our first real tool. This example +tool is also checked into the clang tree at +``tools/clang-check/ClangCheck.cpp``. + +.. code-block:: c++ + + // Declares clang::SyntaxOnlyAction. + #include "clang/Frontend/FrontendActions.h" + #include "clang/Tooling/CommonOptionsParser.h" + #include "clang/Tooling/Tooling.h" + // Declares llvm::cl::extrahelp. + #include "llvm/Support/CommandLine.h" + + using namespace clang::tooling; + using namespace llvm; + + // CommonOptionsParser declares HelpMessage with a description of the common + // command-line options related to the compilation database and input files. + // It's nice to have this help message in all tools. + static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); + + // A help message for this specific tool can be added afterwards. + static cl::extrahelp MoreHelp("\nMore help text..."); + + int main(int argc, const char **argv) { + CommonOptionsParser OptionsParser(argc, argv); + ClangTool Tool(OptionsParser.getCompilations(), + OptionsParser.getSourcePathList()); + return Tool.run(newFrontendActionFactory<clang::SyntaxOnlyAction>()); + } + +Running the tool on some code +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When you check out and build clang, clang-check is already built and available +to you in bin/clang-check inside your build directory. + +You can run clang-check on a file in the llvm repository by specifying all the +needed parameters after a "``--``" separator: + +.. code-block:: bash + + $ cd /path/to/source/llvm + $ export BD=/path/to/build/llvm + $ $BD/bin/clang-check tools/clang/tools/clang-check/ClangCheck.cpp -- \ + clang++ -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS \ + -Itools/clang/include -I$BD/include -Iinclude \ + -Itools/clang/lib/Headers -c + +As an alternative, you can also configure cmake to output a compile command +database into its build directory: + +.. code-block:: bash + + # Alternatively to calling cmake, use ccmake, toggle to advanced mode and + # set the parameter CMAKE_EXPORT_COMPILE_COMMANDS from the UI. + $ cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON . + +This creates a file called ``compile_commands.json`` in the build directory. +Now you can run :program:`clang-check` over files in the project by specifying +the build path as first argument and some source files as further positional +arguments: + +.. code-block:: bash + + $ cd /path/to/source/llvm + $ export BD=/path/to/build/llvm + $ $BD/bin/clang-check -p $BD tools/clang/tools/clang-check/ClangCheck.cpp + + +.. _libtooling_builtin_includes: + +Builtin includes +^^^^^^^^^^^^^^^^ + +Clang tools need their builtin headers and search for them the same way Clang +does. Thus, the default location to look for builtin headers is in a path +``$(dirname /path/to/tool)/../lib/clang/3.3/include`` relative to the tool +binary. This works out-of-the-box for tools running from llvm's toplevel +binary directory after building clang-headers, or if the tool is running from +the binary directory of a clang install next to the clang binary. + +Tips: if your tool fails to find ``stddef.h`` or similar headers, call the tool +with ``-v`` and look at the search paths it looks through. + +Linking +^^^^^^^ + +For a list of libraries to link, look at one of the tools' Makefiles (for +example `clang-check/Makefile +<http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-check/Makefile?view=markup>`_). diff --git a/docs/Makefile.sphinx b/docs/Makefile.sphinx new file mode 100644 index 0000000..7949e39 --- /dev/null +++ b/docs/Makefile.sphinx @@ -0,0 +1,163 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext default + +default: html + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @# FIXME: Remove this `cp` once HTML->Sphinx transition is completed. + @# Kind of a hack, but HTML-formatted docs are on the way out anyway. + @echo "Copying legacy HTML-formatted docs into $(BUILDDIR)/html" + @cp -a *.html $(BUILDDIR)/html + @# FIXME: What we really need is a way to specify redirects, so that + @# we can just redirect to a reST'ified version of this document. + @# PR14714 is tracking the issue of redirects. + @cp -a Block-ABI-Apple.txt $(BUILDDIR)/html + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Clang.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Clang.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/Clang" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Clang" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/docs/MemorySanitizer.rst b/docs/MemorySanitizer.rst new file mode 100644 index 0000000..fdb8a81 --- /dev/null +++ b/docs/MemorySanitizer.rst @@ -0,0 +1,178 @@ +================ +MemorySanitizer +================ + +.. contents:: + :local: + +Introduction +============ + +MemorySanitizer is a detector of uninitialized reads. It consists of a +compiler instrumentation module and a run-time library. + +Typical slowdown introduced by MemorySanitizer is **3x**. + +How to build +============ + +Follow the `clang build instructions <../get_started.html>`_. CMake +build is supported. + +Usage +===== + +Simply compile and link your program with ``-fsanitize=memory`` flag. +The MemorySanitizer run-time library should be linked to the final +executable, so make sure to use ``clang`` (not ``ld``) for the final +link step. When linking shared libraries, the MemorySanitizer run-time +is not linked, so ``-Wl,-z,defs`` may cause link errors (don't use it +with MemorySanitizer). To get a reasonable performance add ``-O1`` or +higher. To get meaninful stack traces in error messages add +``-fno-omit-frame-pointer``. To get perfect stack traces you may need +to disable inlining (just use ``-O1``) and tail call elimination +(``-fno-optimize-sibling-calls``). + +.. code-block:: console + + % cat umr.cc + #include <stdio.h> + + int main(int argc, char** argv) { + int* a = new int[10]; + a[5] = 0; + if (a[argc]) + printf("xx\n"); + return 0; + } + + % clang -fsanitize=memory -fPIE -pie -fno-omit-frame-pointer -g -O2 umr.cc + +If a bug is detected, the program will print an error message to +stderr and exit with a non-zero exit code. Currently, MemorySanitizer +does not symbolize its output by default, so you may need to use a +separate script to symbolize the result offline (this will be fixed in +future). + +.. code-block:: console + + % ./a.out 2>log + % projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt + ==30106== WARNING: MemorySanitizer: UMR (uninitialized-memory-read) + #0 0x7f45944b418a in main umr.cc:6 + #1 0x7f45938b676c in __libc_start_main libc-start.c:226 + Exiting + +By default, MemorySanitizer exits on the first detected error. + +``__has_feature(memory_sanitizer)`` +------------------------------------ + +In some cases one may need to execute different code depending on +whether MemorySanitizer is enabled. :ref:`\_\_has\_feature +<langext-__has_feature-__has_extension>` can be used for this purpose. + +.. code-block:: c + + #if defined(__has_feature) + # if __has_feature(memory_sanitizer) + // code that builds only under MemorySanitizer + # endif + #endif + +``__attribute__((no_sanitize_memory))`` +----------------------------------------------- + +Some code should not be checked by MemorySanitizer. +One may use the function attribute +:ref:`no_sanitize_memory <langext-memory_sanitizer>` +to disable uninitialized checks in a particular function. +MemorySanitizer may still instrument such functions to avoid false positives. +This attribute may not be +supported by other compilers, so we suggest to use it together with +``__has_feature(memory_sanitizer)``. Note: currently, this attribute will be +lost if the function is inlined. + +Origin Tracking +=============== + +MemorySanitizer can track origins of unitialized values, similar to +Valgrind's --track-origins option. This feature is enabled by +``-fsanitize-memory-track-origins`` Clang option. With the code from +the example above, + +.. code-block:: console + + % clang -fsanitize=memory -fsanitize-memory-track-origins -fPIE -pie -fno-omit-frame-pointer -g -O2 umr.cc + % ./a.out 2>log + % projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt + ==14425== WARNING: MemorySanitizer: UMR (uninitialized-memory-read) + ==14425== WARNING: Trying to symbolize code, but external symbolizer is not initialized! + #0 0x7f8bdda3824b in main umr.cc:6 + #1 0x7f8bdce3a76c in __libc_start_main libc-start.c:226 + raw origin id: 2030043137 + ORIGIN: heap allocation: + #0 0x7f8bdda4034b in operator new[](unsigned long) msan_new_delete.cc:39 + #1 0x7f8bdda3814d in main umr.cc:4 + #2 0x7f8bdce3a76c in __libc_start_main libc-start.c:226 + Exiting + +Origin tracking has proved to be very useful for debugging UMR +reports. It slows down program execution by a factor of 1.5x-2x on top +of the usual MemorySanitizer slowdown. + +Handling external code +============================ + +MemorySanitizer requires that all program code is instrumented. This +also includes any libraries that the program depends on, even libc. +Failing to achieve this may result in false UMR reports. + +Full MemorySanitizer instrumentation is very difficult to achieve. To +make it easier, MemorySanitizer runtime library includes 70+ +interceptors for the most common libc functions. They make it possible +to run MemorySanitizer-instrumented programs linked with +uninstrumented libc. For example, the authors were able to bootstrap +MemorySanitizer-instrumented Clang compiler by linking it with +self-built instrumented libcxx (as a replacement for libstdc++). + +In the case when rebuilding all program dependencies with +MemorySanitizer is problematic, an experimental MSanDR tool can be +used. It is a DynamoRio-based tool that uses dynamic instrumentation +to avoid false positives due to uninstrumented code. The tool simply +marks memory from instrumented libraries as fully initialized. See +`http://code.google.com/p/memory-sanitizer/wiki/Running#Running_with_the_dynamic_tool` +for more information. + +Supported Platforms +=================== + +MemorySanitizer is supported on + +* Linux x86\_64 (tested on Ubuntu 10.04 and 12.04); + +Limitations +=========== + +* MemorySanitizer uses 2x more real memory than a native run, 3x with + origin tracking. +* MemorySanitizer maps (but not reserves) 64 Terabytes of virtual + address space. This means that tools like ``ulimit`` may not work as + usually expected. +* Static linking is not supported. +* Non-position-independent executables are not supported. +* Depending on the version of Linux kernel, running without ASLR may + be not supported. Note that GDB disables ASLR by default. To debug + instrumented programs, use "set disable-randomization off". + +Current Status +============== + +MemorySanitizer is an experimental tool. It is known to work on large +real-world programs, like Clang/LLVM itself. + +More Information +================ + +`http://code.google.com/p/memory-sanitizer <http://code.google.com/p/memory-sanitizer/>`_ + diff --git a/docs/Modules.rst b/docs/Modules.rst new file mode 100644 index 0000000..8a6b8b6 --- /dev/null +++ b/docs/Modules.rst @@ -0,0 +1,713 @@ +======= +Modules +======= + +.. contents:: + :local: + +.. warning:: + The functionality described on this page is still experimental! Please + try it out and send us bug reports! + +Introduction +============ +Most software is built using a number of software libraries, including libraries supplied by the platform, internal libraries built as part of the software itself to provide structure, and third-party libraries. For each library, one needs to access both its interface (API) and its implementation. In the C family of languages, the interface to a library is accessed by including the appropriate header files(s): + +.. code-block:: c + + #include <SomeLib.h> + +The implementation is handled separately by linking against the appropriate library. For example, by passing ``-lSomeLib`` to the linker. + +Modules provide an alternative, simpler way to use software libraries that provides better compile-time scalability and eliminates many of the problems inherent to using the C preprocessor to access the API of a library. + +Problems with the current model +------------------------------- +The ``#include`` mechanism provided by the C preprocessor is a very poor way to access the API of a library, for a number of reasons: + +* **Compile-time scalability**: Each time a header is included, the + compiler must preprocess and parse the text in that header and every + header it includes, transitively. This process must be repeated for + every translation unit in the application, which involves a huge + amount of redundant work. In a project with *N* translation units + and *M* headers included in each translation unit, the compiler is + performing *M x N* work even though most of the *M* headers are + shared among multiple translation units. C++ is particularly bad, + because the compilation model for templates forces a huge amount of + code into headers. + +* **Fragility**: ``#include`` directives are treated as textual + inclusion by the preprocessor, and are therefore subject to any + active macro definitions at the time of inclusion. If any of the + active macro definitions happens to collide with a name in the + library, it can break the library API or cause compilation failures + in the library header itself. For an extreme example, + ``#define std "The C++ Standard"`` and then include a standard + library header: the result is a horrific cascade of failures in the + C++ Standard Library's implementation. More subtle real-world + problems occur when the headers for two different libraries interact + due to macro collisions, and users are forced to reorder + ``#include`` directives or introduce ``#undef`` directives to break + the (unintended) dependency. + +* **Conventional workarounds**: C programmers have + adopted a number of conventions to work around the fragility of the + C preprocessor model. Include guards, for example, are required for + the vast majority of headers to ensure that multiple inclusion + doesn't break the compile. Macro names are written with + ``LONG_PREFIXED_UPPERCASE_IDENTIFIERS`` to avoid collisions, and some + library/framework developers even use ``__underscored`` names + in headers to avoid collisions with "normal" names that (by + convention) shouldn't even be macros. These conventions are a + barrier to entry for developers coming from non-C languages, are + boilerplate for more experienced developers, and make our headers + far uglier than they should be. + +* **Tool confusion**: In a C-based language, it is hard to build tools + that work well with software libraries, because the boundaries of + the libraries are not clear. Which headers belong to a particular + library, and in what order should those headers be included to + guarantee that they compile correctly? Are the headers C, C++, + Objective-C++, or one of the variants of these languages? What + declarations in those headers are actually meant to be part of the + API, and what declarations are present only because they had to be + written as part of the header file? + +Semantic import +--------------- +Modules improve access to the API of software libraries by replacing the textual preprocessor inclusion model with a more robust, more efficient semantic model. From the user's perspective, the code looks only slightly different, because one uses an ``import`` declaration rather than a ``#include`` preprocessor directive: + +.. code-block:: c + + import std.io; // pseudo-code; see below for syntax discussion + +However, this module import behaves quite differently from the corresponding ``#include <stdio.h>``: when the compiler sees the module import above, it loads a binary representation of the ``std.io`` module and makes its API available to the application directly. Preprocessor definitions that precede the import declaration have no impact on the API provided by ``std.io``, because the module itself was compiled as a separate, standalone module. Additionally, any linker flags required to use the ``std.io`` module will automatically be provided when the module is imported [#]_ +This semantic import model addresses many of the problems of the preprocessor inclusion model: + +* **Compile-time scalability**: The ``std.io`` module is only compiled once, and importing the module into a translation unit is a constant-time operation (independent of module system). Thus, the API of each software library is only parsed once, reducing the *M x N* compilation problem to an *M + N* problem. + +* **Fragility**: Each module is parsed as a standalone entity, so it has a consistent preprocessor environment. This completely eliminates the need for ``__underscored`` names and similarly defensive tricks. Moreover, the current preprocessor definitions when an import declaration is encountered are ignored, so one software library can not affect how another software library is compiled, eliminating include-order dependencies. + +* **Tool confusion**: Modules describe the API of software libraries, and tools can reason about and present a module as a representation of that API. Because modules can only be built standalone, tools can rely on the module definition to ensure that they get the complete API for the library. Moreover, modules can specify which languages they work with, so, e.g., one can not accidentally attempt to load a C++ module into a C program. + +Problems modules do not solve +----------------------------- +Many programming languages have a module or package system, and because of the variety of features provided by these languages it is important to define what modules do *not* do. In particular, all of the following are considered out-of-scope for modules: + +* **Rewrite the world's code**: It is not realistic to require applications or software libraries to make drastic or non-backward-compatible changes, nor is it feasible to completely eliminate headers. Modules must interoperate with existing software libraries and allow a gradual transition. + +* **Versioning**: Modules have no notion of version information. Programmers must still rely on the existing versioning mechanisms of the underlying language (if any exist) to version software libraries. + +* **Namespaces**: Unlike in some languages, modules do not imply any notion of namespaces. Thus, a struct declared in one module will still conflict with a struct of the same name declared in a different module, just as they would if declared in two different headers. This aspect is important for backward compatibility, because (for example) the mangled names of entities in software libraries must not change when introducing modules. + +* **Binary distribution of modules**: Headers (particularly C++ headers) expose the full complexity of the language. Maintaining a stable binary module format across architectures, compiler versions, and compiler vendors is technically infeasible. + +Using Modules +============= +To enable modules, pass the command-line flag ``-fmodules`` [#]_. This will make any modules-enabled software libraries available as modules as well as introducing any modules-specific syntax. Additional `command-line parameters`_ are described in a separate section later. + +Import declaration +------------------ +The most direct way to import a module is with an *import declaration*, which imports the named module: + +.. parsed-literal:: + + import std; + +The import declaration above imports the entire contents of the ``std`` module (which would contain, e.g., the entire C or C++ standard library) and make its API available within the current translation unit. To import only part of a module, one may use dot syntax to specific a particular submodule, e.g., + +.. parsed-literal:: + + import std.io; + +Redundant import declarations are ignored, and one is free to import modules at any point within the translation unit, so long as the import declaration is at global scope. + +.. warning:: + The import declaration syntax described here does not actually exist. Rather, it is a straw man proposal that may very well change when modules are discussed in the C and C++ committees. See the section `Includes as imports`_ to see how modules get imported today. + +Includes as imports +------------------- +The primary user-level feature of modules is the import operation, which provides access to the API of software libraries. However, today's programs make extensive use of ``#include``, and it is unrealistic to assume that all of this code will change overnight. Instead, modules automatically translate ``#include`` directives into the corresponding module import. For example, the include directive + +.. code-block:: c + + #include <stdio.h> + +will be automatically mapped to an import of the module ``std.io``. Even with specific ``import`` syntax in the language, this particular feature is important for both adoption and backward compatibility: automatic translation of ``#include`` to ``import`` allows an application to get the benefits of modules (for all modules-enabled libraries) without any changes to the application itself. Thus, users can easily use modules with one compiler while falling back to the preprocessor-inclusion mechanism with other compilers. + +.. note:: + + The automatic mapping of ``#include`` to ``import`` also solves an implementation problem: importing a module with a definition of some entity (say, a ``struct Point``) and then parsing a header containing another definition of ``struct Point`` would cause a redefinition error, even if it is the same ``struct Point``. By mapping ``#include`` to ``import``, the compiler can guarantee that it always sees just the already-parsed definition from the module. + +Module maps +----------- +The crucial link between modules and headers is described by a *module map*, which describes how a collection of existing headers maps on to the (logical) structure of a module. For example, one could imagine a module ``std`` covering the C standard library. Each of the C standard library headers (``<stdio.h>``, ``<stdlib.h>``, ``<math.h>``, etc.) would contribute to the ``std`` module, by placing their respective APIs into the corresponding submodule (``std.io``, ``std.lib``, ``std.math``, etc.). Having a list of the headers that are part of the ``std`` module allows the compiler to build the ``std`` module as a standalone entity, and having the mapping from header names to (sub)modules allows the automatic translation of ``#include`` directives to module imports. + +Module maps are specified as separate files (each named ``module.map``) alongside the headers they describe, which allows them to be added to existing software libraries without having to change the library headers themselves (in most cases [#]_). The actual `Module map language`_ is described in a later section. + +.. note:: + + To actually see any benefits from modules, one first has to introduce module maps for the underlying C standard library and the libraries and headers on which it depends. The section `Modularizing a Platform`_ describes the steps one must take to write these module maps. + +Compilation model +----------------- +The binary representation of modules is automatically generated by the compiler on an as-needed basis. When a module is imported (e.g., by an ``#include`` of one of the module's headers), the compiler will spawn a second instance of itself [#]_, with a fresh preprocessing context [#]_, to parse just the headers in that module. The resulting Abstract Syntax Tree (AST) is then persisted into the binary representation of the module that is then loaded into translation unit where the module import was encountered. + +The binary representation of modules is persisted in the *module cache*. Imports of a module will first query the module cache and, if a binary representation of the required module is already available, will load that representation directly. Thus, a module's headers will only be parsed once per language configuration, rather than once per translation unit that uses the module. + +Modules maintain references to each of the headers that were part of the module build. If any of those headers changes, or if any of the modules on which a module depends change, then the module will be (automatically) recompiled. The process should never require any user intervention. + +Command-line parameters +----------------------- +``-fmodules`` + Enable the modules feature (EXPERIMENTAL). + +``-fcxx-modules`` + Enable the modules feature for C++ (EXPERIMENTAL and VERY BROKEN). + +``-fmodules-cache-path=<directory>`` + Specify the path to the modules cache. If not provided, Clang will select a system-appropriate default. + +``-f[no-]modules-autolink`` + Enable of disable automatic linking against the libraries associated with imported modules. + +``-fmodules-ignore-macro=macroname`` + Instruct modules to ignore the named macro when selecting an appropriate module variant. Use this for macros defined on the command line that don't affect how modules are built, to improve sharing of compiled module files. + +``-fmodules-prune-interval=seconds`` + Specify the minimum delay (in seconds) between attempts to prune the module cache. Module cache pruning attempts to clear out old, unused module files so that the module cache itself does not grow without bound. The default delay is large (604,800 seconds, or 7 days) because this is an expensive operation. Set this value to 0 to turn off pruning. + +``-fmodules-prune-after=seconds`` + Specify the minimum time (in seconds) for which a file in the module cache must be unused (according to access time) before module pruning will remove it. The default delay is large (2,678,400 seconds, or 31 days) to avoid excessive module rebuilding. + +``-module-file-info <module file name>`` + Debugging aid that prints information about a given module file (with a ``.pcm`` extension), including the language and preprocessor options that particular module variant was built with. + +Module Map Language +=================== + +The module map language describes the mapping from header files to the +logical structure of modules. To enable support for using a library as +a module, one must write a ``module.map`` file for that library. The +``module.map`` file is placed alongside the header files themselves, +and is written in the module map language described below. + +As an example, the module map file for the C standard library might look a bit like this: + +.. parsed-literal:: + + module std [system] { + module complex { + header "complex.h" + export * + } + + module ctype { + header "ctype.h" + export * + } + + module errno { + header "errno.h" + header "sys/errno.h" + export * + } + + module fenv { + header "fenv.h" + export * + } + + // ...more headers follow... + } + +Here, the top-level module ``std`` encompasses the whole C standard library. It has a number of submodules containing different parts of the standard library: ``complex`` for complex numbers, ``ctype`` for character types, etc. Each submodule lists one of more headers that provide the contents for that submodule. Finally, the ``export *`` command specifies that anything included by that submodule will be automatically re-exported. + +Lexical structure +----------------- +Module map files use a simplified form of the C99 lexer, with the same rules for identifiers, tokens, string literals, ``/* */`` and ``//`` comments. The module map language has the following reserved words; all other C identifiers are valid identifiers. + +.. parsed-literal:: + + ``config_macros`` ``export`` ``module`` + ``conflict`` ``framework`` ``requires`` + ``exclude`` ``header`` ``umbrella`` + ``explicit`` ``link`` + +Module map file +--------------- +A module map file consists of a series of module declarations: + +.. parsed-literal:: + + *module-map-file*: + *module-declaration** + +Within a module map file, modules are referred to by a *module-id*, which uses periods to separate each part of a module's name: + +.. parsed-literal:: + + *module-id*: + *identifier* ('.' *identifier*)* + +Module declaration +------------------ +A module declaration describes a module, including the headers that contribute to that module, its submodules, and other aspects of the module. + +.. parsed-literal:: + + *module-declaration*: + ``explicit``:sub:`opt` ``framework``:sub:`opt` ``module`` *module-id* *attributes*:sub:`opt` '{' *module-member** '}' + +The *module-id* should consist of only a single *identifier*, which provides the name of the module being defined. Each module shall have a single definition. + +The ``explicit`` qualifier can only be applied to a submodule, i.e., a module that is nested within another module. The contents of explicit submodules are only made available when the submodule itself was explicitly named in an import declaration or was re-exported from an imported module. + +The ``framework`` qualifier specifies that this module corresponds to a Darwin-style framework. A Darwin-style framework (used primarily on Mac OS X and iOS) is contained entirely in directory ``Name.framework``, where ``Name`` is the name of the framework (and, therefore, the name of the module). That directory has the following layout: + +.. parsed-literal:: + + Name.framework/ + module.map Module map for the framework + Headers/ Subdirectory containing framework headers + Frameworks/ Subdirectory containing embedded frameworks + Resources/ Subdirectory containing additional resources + Name Symbolic link to the shared library for the framework + +The ``system`` attribute specifies that the module is a system module. When a system module is rebuilt, all of the module's header will be considered system headers, which suppresses warnings. This is equivalent to placing ``#pragma GCC system_header`` in each of the module's headers. The form of attributes is described in the section Attributes_, below. + +Modules can have a number of different kinds of members, each of which is described below: + +.. parsed-literal:: + + *module-member*: + *requires-declaration* + *header-declaration* + *umbrella-dir-declaration* + *submodule-declaration* + *export-declaration* + *link-declaration* + *config-macros-declaration* + *conflict-declaration* + +Requires declaration +~~~~~~~~~~~~~~~~~~~~ +A *requires-declaration* specifies the requirements that an importing translation unit must satisfy to use the module. + +.. parsed-literal:: + + *requires-declaration*: + ``requires`` *feature-list* + + *feature-list*: + *identifier* (',' *identifier*)* + +The requirements clause allows specific modules or submodules to specify that they are only accessible with certain language dialects or on certain platforms. The feature list is a set of identifiers, defined below. If any of the features is not available in a given translation unit, that translation unit shall not import the module. + +The following features are defined: + +altivec + The target supports AltiVec. + +blocks + The "blocks" language feature is available. + +cplusplus + C++ support is available. + +cplusplus11 + C++11 support is available. + +objc + Objective-C support is available. + +objc_arc + Objective-C Automatic Reference Counting (ARC) is available + +opencl + OpenCL is available + +tls + Thread local storage is available. + +*target feature* + A specific target feature (e.g., ``sse4``, ``avx``, ``neon``) is available. + + +**Example**: The ``std`` module can be extended to also include C++ and C++11 headers using a *requires-declaration*: + +.. parsed-literal:: + + module std { + // C standard library... + + module vector { + requires cplusplus + header "vector" + } + + module type_traits { + requires cplusplus11 + header "type_traits" + } + } + +Header declaration +~~~~~~~~~~~~~~~~~~ +A header declaration specifies that a particular header is associated with the enclosing module. + +.. parsed-literal:: + + *header-declaration*: + ``umbrella``:sub:`opt` ``header`` *string-literal* + ``exclude`` ``header`` *string-literal* + +A header declaration that does not contain ``exclude`` specifies a header that contributes to the enclosing module. Specifically, when the module is built, the named header will be parsed and its declarations will be (logically) placed into the enclosing submodule. + +A header with the ``umbrella`` specifier is called an umbrella header. An umbrella header includes all of the headers within its directory (and any subdirectories), and is typically used (in the ``#include`` world) to easily access the full API provided by a particular library. With modules, an umbrella header is a convenient shortcut that eliminates the need to write out ``header`` declarations for every library header. A given directory can only contain a single umbrella header. + +.. note:: + Any headers not included by the umbrella header should have + explicit ``header`` declarations. Use the + ``-Wincomplete-umbrella`` warning option to ask Clang to complain + about headers not covered by the umbrella header or the module map. + +A header with the ``exclude`` specifier is excluded from the module. It will not be included when the module is built, nor will it be considered to be part of the module. + +**Example**: The C header ``assert.h`` is an excellent candidate for an excluded header, because it is meant to be included multiple times (possibly with different ``NDEBUG`` settings). + +.. parsed-literal:: + + module std [system] { + exclude header "assert.h" + } + +A given header shall not be referenced by more than one *header-declaration*. + +Umbrella directory declaration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +An umbrella directory declaration specifies that all of the headers in the specified directory should be included within the module. + +.. parsed-literal:: + + *umbrella-dir-declaration*: + ``umbrella`` *string-literal* + +The *string-literal* refers to a directory. When the module is built, all of the header files in that directory (and its subdirectories) are included in the module. + +An *umbrella-dir-declaration* shall not refer to the same directory as the location of an umbrella *header-declaration*. In other words, only a single kind of umbrella can be specified for a given directory. + +.. note:: + + Umbrella directories are useful for libraries that have a large number of headers but do not have an umbrella header. + + +Submodule declaration +~~~~~~~~~~~~~~~~~~~~~ +Submodule declarations describe modules that are nested within their enclosing module. + +.. parsed-literal:: + + *submodule-declaration*: + *module-declaration* + *inferred-submodule-declaration* + +A *submodule-declaration* that is a *module-declaration* is a nested module. If the *module-declaration* has a ``framework`` specifier, the enclosing module shall have a ``framework`` specifier; the submodule's contents shall be contained within the subdirectory ``Frameworks/SubName.framework``, where ``SubName`` is the name of the submodule. + +A *submodule-declaration* that is an *inferred-submodule-declaration* describes a set of submodules that correspond to any headers that are part of the module but are not explicitly described by a *header-declaration*. + +.. parsed-literal:: + + *inferred-submodule-declaration*: + ``explicit``:sub:`opt` ``framework``:sub:`opt` ``module`` '*' *attributes*:sub:`opt` '{' *inferred-submodule-member** '}' + + *inferred-submodule-member*: + ``export`` '*' + +A module containing an *inferred-submodule-declaration* shall have either an umbrella header or an umbrella directory. The headers to which the *inferred-submodule-declaration* applies are exactly those headers included by the umbrella header (transitively) or included in the module because they reside within the umbrella directory (or its subdirectories). + +For each header included by the umbrella header or in the umbrella directory that is not named by a *header-declaration*, a module declaration is implicitly generated from the *inferred-submodule-declaration*. The module will: + +* Have the same name as the header (without the file extension) +* Have the ``explicit`` specifier, if the *inferred-submodule-declaration* has the ``explicit`` specifier +* Have the ``framework`` specifier, if the + *inferred-submodule-declaration* has the ``framework`` specifier +* Have the attributes specified by the \ *inferred-submodule-declaration* +* Contain a single *header-declaration* naming that header +* Contain a single *export-declaration* ``export *``, if the \ *inferred-submodule-declaration* contains the \ *inferred-submodule-member* ``export *`` + +**Example**: If the subdirectory "MyLib" contains the headers ``A.h`` and ``B.h``, then the following module map: + +.. parsed-literal:: + + module MyLib { + umbrella "MyLib" + explicit module * { + export * + } + } + +is equivalent to the (more verbose) module map: + +.. parsed-literal:: + + module MyLib { + explicit module A { + header "A.h" + export * + } + + explicit module B { + header "B.h" + export * + } + } + +Export declaration +~~~~~~~~~~~~~~~~~~ +An *export-declaration* specifies which imported modules will automatically be re-exported as part of a given module's API. + +.. parsed-literal:: + + *export-declaration*: + ``export`` *wildcard-module-id* + + *wildcard-module-id*: + *identifier* + '*' + *identifier* '.' *wildcard-module-id* + +The *export-declaration* names a module or a set of modules that will be re-exported to any translation unit that imports the enclosing module. Each imported module that matches the *wildcard-module-id* up to, but not including, the first ``*`` will be re-exported. + +**Example**:: In the following example, importing ``MyLib.Derived`` also provides the API for ``MyLib.Base``: + +.. parsed-literal:: + + module MyLib { + module Base { + header "Base.h" + } + + module Derived { + header "Derived.h" + export Base + } + } + +Note that, if ``Derived.h`` includes ``Base.h``, one can simply use a wildcard export to re-export everything ``Derived.h`` includes: + +.. parsed-literal:: + + module MyLib { + module Base { + header "Base.h" + } + + module Derived { + header "Derived.h" + export * + } + } + +.. note:: + + The wildcard export syntax ``export *`` re-exports all of the + modules that were imported in the actual header file. Because + ``#include`` directives are automatically mapped to module imports, + ``export *`` provides the same transitive-inclusion behavior + provided by the C preprocessor, e.g., importing a given module + implicitly imports all of the modules on which it depends. + Therefore, liberal use of ``export *`` provides excellent backward + compatibility for programs that rely on transitive inclusion (i.e., + all of them). + +Link declaration +~~~~~~~~~~~~~~~~ +A *link-declaration* specifies a library or framework against which a program should be linked if the enclosing module is imported in any translation unit in that program. + +.. parsed-literal:: + + *link-declaration*: + ``link`` ``framework``:sub:`opt` *string-literal* + +The *string-literal* specifies the name of the library or framework against which the program should be linked. For example, specifying "clangBasic" would instruct the linker to link with ``-lclangBasic`` for a Unix-style linker. + +A *link-declaration* with the ``framework`` specifies that the linker should link against the named framework, e.g., with ``-framework MyFramework``. + +.. note:: + + Automatic linking with the ``link`` directive is not yet widely + implemented, because it requires support from both the object file + format and the linker. The notion is similar to Microsoft Visual + Studio's ``#pragma comment(lib...)``. + +Configuration macros declaration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The *config-macros-declaration* specifies the set of configuration macros that have an effect on the the API of the enclosing module. + +.. parsed-literal:: + + *config-macros-declaration*: + ``config_macros`` *attributes*:sub:`opt` *config-macro-list*:sub:`opt` + + *config-macro-list*: + *identifier* (',' *identifier*)* + +Each *identifier* in the *config-macro-list* specifies the name of a macro. The compiler is required to maintain different variants of the given module for differing definitions of any of the named macros. + +A *config-macros-declaration* shall only be present on a top-level module, i.e., a module that is not nested within an enclosing module. + +The ``exhaustive`` attribute specifies that the list of macros in the *config-macros-declaration* is exhaustive, meaning that no other macro definition is intended to have an effect on the API of that module. + +.. note:: + + The ``exhaustive`` attribute implies that any macro definitions + for macros not listed as configuration macros should be ignored + completely when building the module. As an optimization, the + compiler could reduce the number of unique module variants by not + considering these non-configuration macros. This optimization is not + yet implemented in Clang. + +A translation unit shall not import the same module under different definitions of the configuration macros. + +.. note:: + + Clang implements a weak form of this requirement: the definitions + used for configuration macros are fixed based on the definitions + provided by the command line. If an import occurs and the definition + of any configuration macro has changed, the compiler will produce a + warning (under the control of ``-Wconfig-macros``). + +**Example:** A logging library might provide different API (e.g., in the form of different definitions for a logging macro) based on the ``NDEBUG`` macro setting: + +.. parsed-literal:: + + module MyLogger { + umbrella header "MyLogger.h" + config_macros [exhaustive] NDEBUG + } + +Conflict declarations +~~~~~~~~~~~~~~~~~~~~~ +A *conflict-declaration* describes a case where the presence of two different modules in the same translation unit is likely to cause a problem. For example, two modules may provide similar-but-incompatible functionality. + +.. parsed-literal:: + + *conflict-declaration*: + ``conflict`` *module-id* ',' *string-literal* + +The *module-id* of the *conflict-declaration* specifies the module with which the enclosing module conflicts. The specified module shall not have been imported in the translation unit when the enclosing module is imported. + +The *string-literal* provides a message to be provided as part of the compiler diagnostic when two modules conflict. + +.. note:: + + Clang emits a warning (under the control of ``-Wmodule-conflict``) + when a module conflict is discovered. + +**Example:** + +.. parsed-literal:: + + module Conflicts { + explicit module A { + header "conflict_a.h" + conflict B, "we just don't like B" + } + + module B { + header "conflict_b.h" + } + } + + +Attributes +---------- +Attributes are used in a number of places in the grammar to describe specific behavior of other declarations. The format of attributes is fairly simple. + +.. parsed-literal:: + + *attributes*: + *attribute* *attributes*:sub:`opt` + + *attribute*: + '[' *identifier* ']' + +Any *identifier* can be used as an attribute, and each declaration specifies what attributes can be applied to it. + +Modularizing a Platform +======================= +To get any benefit out of modules, one needs to introduce module maps for software libraries starting at the bottom of the stack. This typically means introducing a module map covering the operating system's headers and the C standard library headers (in ``/usr/include``, for a Unix system). + +The module maps will be written using the `module map language`_, which provides the tools necessary to describe the mapping between headers and modules. Because the set of headers differs from one system to the next, the module map will likely have to be somewhat customized for, e.g., a particular distribution and version of the operating system. Moreover, the system headers themselves may require some modification, if they exhibit any anti-patterns that break modules. Such common patterns are described below. + +**Macro-guarded copy-and-pasted definitions** + System headers vend core types such as ``size_t`` for users. These types are often needed in a number of system headers, and are almost trivial to write. Hence, it is fairly common to see a definition such as the following copy-and-pasted throughout the headers: + + .. parsed-literal:: + + #ifndef _SIZE_T + #define _SIZE_T + typedef __SIZE_TYPE__ size_t; + #endif + + Unfortunately, when modules compiles all of the C library headers together into a single module, only the first actual type definition of ``size_t`` will be visible, and then only in the submodule corresponding to the lucky first header. Any other headers that have copy-and-pasted versions of this pattern will *not* have a definition of ``size_t``. Importing the submodule corresponding to one of those headers will therefore not yield ``size_t`` as part of the API, because it wasn't there when the header was parsed. The fix for this problem is either to pull the copied declarations into a common header that gets included everywhere ``size_t`` is part of the API, or to eliminate the ``#ifndef`` and redefine the ``size_t`` type. The latter works for C++ headers and C11, but will cause an error for non-modules C90/C99, where redefinition of ``typedefs`` is not permitted. + +**Conflicting definitions** + Different system headers may provide conflicting definitions for various macros, functions, or types. These conflicting definitions don't tend to cause problems in a pre-modules world unless someone happens to include both headers in one translation unit. Since the fix is often simply "don't do that", such problems persist. Modules requires that the conflicting definitions be eliminated or that they be placed in separate modules (the former is generally the better answer). + +**Missing includes** + Headers are often missing ``#include`` directives for headers that they actually depend on. As with the problem of conflicting definitions, this only affects unlucky users who don't happen to include headers in the right order. With modules, the headers of a particular module will be parsed in isolation, so the module may fail to build if there are missing includes. + +**Headers that vend multiple APIs at different times** + Some systems have headers that contain a number of different kinds of API definitions, only some of which are made available with a given include. For example, the header may vend ``size_t`` only when the macro ``__need_size_t`` is defined before that header is included, and also vend ``wchar_t`` only when the macro ``__need_wchar_t`` is defined. Such headers are often included many times in a single translation unit, and will have no include guards. There is no sane way to map this header to a submodule. One can either eliminate the header (e.g., by splitting it into separate headers, one per actual API) or simply ``exclude`` it in the module map. + +To detect and help address some of these problems, the ``clang-tools-extra`` repository contains a ``modularize`` tool that parses a set of given headers and attempts to detect these problems and produce a report. See the tool's in-source documentation for information on how to check your system or library headers. + +Future Directions +================= +Modules is an experimental feature, and there is much work left to do to make it both real and useful. Here are a few ideas: + +**Detect unused module imports** + Unlike with ``#include`` directives, it should be fairly simple to track whether a directly-imported module has ever been used. By doing so, Clang can emit ``unused import`` or ``unused #include`` diagnostics, including Fix-Its to remove the useless imports/includes. + +**Fix-Its for missing imports** + It's fairly common for one to make use of some API while writing code, only to get a compiler error about "unknown type" or "no function named" because the corresponding header has not been included. Clang should detect such cases and auto-import the required module (with a Fix-It!). + +**Improve modularize** + The modularize tool is both extremely important (for deployment) and extremely crude. It needs better UI, better detection of problems (especially for C++), and perhaps an assistant mode to help write module maps for you. + +**C++ Support** + Modules clearly has to work for C++, or we'll never get to use it for the Clang code base. + +Where To Learn More About Modules +================================= +The Clang source code provides additional information about modules: + +``clang/lib/Headers/module.map`` + Module map for Clang's compiler-specific header files. + +``clang/test/Modules/`` + Tests specifically related to modules functionality. + +``clang/include/clang/Basic/Module.h`` + The ``Module`` class in this header describes a module, and is used throughout the compiler to implement modules. + +``clang/include/clang/Lex/ModuleMap.h`` + The ``ModuleMap`` class in this header describes the full module map, consisting of all of the module map files that have been parsed, and providing facilities for looking up module maps and mapping between modules and headers (in both directions). + +PCHInternals_ + Information about the serialized AST format used for precompiled headers and modules. The actual implementation is in the ``clangSerialization`` library. + +.. [#] Automatic linking against the libraries of modules requires specific linker support, which is not widely available. + +.. [#] Modules are only available in C and Objective-C; a separate flag ``-fcxx-modules`` enables modules support for C++, which is even more experimental and broken. + +.. [#] There are certain anti-patterns that occur in headers, particularly system headers, that cause problems for modules. The section `Modularizing a Platform`_ describes some of them. + +.. [#] The second instance is actually a new thread within the current process, not a separate process. However, the original compiler instance is blocked on the execution of this thread. + +.. [#] The preprocessing context in which the modules are parsed is actually dependent on the command-line options provided to the compiler, including the language dialect and any ``-D`` options. However, the compiled modules for different command-line options are kept distinct, and any preprocessor directives that occur within the translation unit are ignored. See the section on the `Configuration macros declaration`_ for more information. + +.. _PCHInternals: PCHInternals.html + diff --git a/docs/ObjectiveCLiterals.html b/docs/ObjectiveCLiterals.html deleted file mode 100644 index d5a8a9e..0000000 --- a/docs/ObjectiveCLiterals.html +++ /dev/null @@ -1,423 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ --> -<html> -<head> - <META http-equiv="Content-Type" content="text/html; charset=UTF8"> - <title>Objective-C Literals</title> - <link type="text/css" rel="stylesheet" href="../menu.css"> - <link type="text/css" rel="stylesheet" href="../content.css"> - <style type="text/css"> - td { - vertical-align: top; - } - th { background-color: #ffddaa; } - </style> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>Objective-C Literals</h1> - -<h2>Introduction</h2> - -<p>Three new features were introduced into clang at the same time: <i>NSNumber Literals</i> provide a syntax for creating <code>NSNumber</code> from scalar literal expressions; <i>Collection Literals</i> provide a short-hand for creating arrays and dictionaries; <i>Object Subscripting</i> provides a way to use subscripting with Objective-C objects. Users of Apple compiler releases can use these features starting with the Apple LLVM Compiler 4.0. Users of open-source LLVM.org compiler releases can use these features starting with clang v3.1.</p> - -<p>These language additions simplify common Objective-C programming patterns, make programs more concise, and improve the safety of container creation.</p> - -<p>This document describes how the features are implemented in clang, and how to use them in your own programs.</p> - -<h2>NSNumber Literals</h2> - -<p>The framework class <code>NSNumber</code> is used to wrap scalar values inside objects: signed and unsigned integers (<code>char</code>, <code>short</code>, <code>int</code>, <code>long</code>, <code>long long</code>), floating point numbers (<code>float</code>, <code>double</code>), and boolean values (<code>BOOL</code>, C++ <code>bool</code>). Scalar values wrapped in objects are also known as <i>boxed</i> values.</p> - -<p>In Objective-C, any character, numeric or boolean literal prefixed with the <code>'@'</code> character will evaluate to a pointer to an <code>NSNumber</code> object initialized with that value. C's type suffixes may be used to control the size of numeric literals.</p> - -<h3>Examples</h3> - -<p>The following program illustrates the rules for <code>NSNumber</code> literals:</p> - -<pre> -void main(int argc, const char *argv[]) { - // character literals. - NSNumber *theLetterZ = @'Z'; // equivalent to [NSNumber numberWithChar:'Z'] - - // integral literals. - NSNumber *fortyTwo = @42; // equivalent to [NSNumber numberWithInt:42] - NSNumber *fortyTwoUnsigned = @42U; // equivalent to [NSNumber numberWithUnsignedInt:42U] - NSNumber *fortyTwoLong = @42L; // equivalent to [NSNumber numberWithLong:42L] - NSNumber *fortyTwoLongLong = @42LL; // equivalent to [NSNumber numberWithLongLong:42LL] - - // floating point literals. - NSNumber *piFloat = @3.141592654F; // equivalent to [NSNumber numberWithFloat:3.141592654F] - NSNumber *piDouble = @3.1415926535; // equivalent to [NSNumber numberWithDouble:3.1415926535] - - // BOOL literals. - NSNumber *yesNumber = @YES; // equivalent to [NSNumber numberWithBool:YES] - NSNumber *noNumber = @NO; // equivalent to [NSNumber numberWithBool:NO] - -#ifdef __cplusplus - NSNumber *trueNumber = @true; // equivalent to [NSNumber numberWithBool:(BOOL)true] - NSNumber *falseNumber = @false; // equivalent to [NSNumber numberWithBool:(BOOL)false] -#endif -} -</pre> - -<h3>Discussion</h3> - -<p>NSNumber literals only support literal scalar values after the <code>'@'</code>. Consequently, <code>@INT_MAX</code> works, but <code>@INT_MIN</code> does not, because they are defined like this:</p> - -<pre> -#define INT_MAX 2147483647 /* max value for an int */ -#define INT_MIN (-2147483647-1) /* min value for an int */ -</pre> - -<p>The definition of <code>INT_MIN</code> is not a simple literal, but a parenthesized expression. Parenthesized -expressions are supported using the <a href="#objc_boxed_expressions">boxed expression</a> syntax, which is described in the next section.</p> - -<p>Because <code>NSNumber</code> does not currently support wrapping <code>long double</code> values, the use of a <code>long double NSNumber</code> literal (e.g. <code>@123.23L</code>) will be rejected by the compiler.</p> - -<p>Previously, the <code>BOOL</code> type was simply a typedef for <code>signed char</code>, and <code>YES</code> and <code>NO</code> were macros that expand to <code>(BOOL)1</code> and <code>(BOOL)0</code> respectively. To support <code>@YES</code> and <code>@NO</code> expressions, these macros are now defined using new language keywords in <code><objc/objc.h></code>:</p> - -<pre> -#if __has_feature(objc_bool) -#define YES __objc_yes -#define NO __objc_no -#else -#define YES ((BOOL)1) -#define NO ((BOOL)0) -#endif -</pre> - -<p>The compiler implicitly converts <code>__objc_yes</code> and <code>__objc_no</code> to <code>(BOOL)1</code> and <code>(BOOL)0</code>. The keywords are used to disambiguate <code>BOOL</code> and integer literals.</p> - -<p>Objective-C++ also supports <code>@true</code> and <code>@false</code> expressions, which are equivalent to <code>@YES</code> and <code>@NO</code>.</p> - -<!-- ======================================================================= --> -<h2 id="objc_boxed_expressions">Boxed Expressions</h2> -<!-- ======================================================================= --> - -<p>Objective-C provides a new syntax for boxing C expressions:</p> - -<pre> -<code>@( <em>expression</em> )</code> -</pre> - -<p>Expressions of scalar (numeric, enumerated, BOOL) and C string pointer types -are supported:</p> - -<pre> -// numbers. -NSNumber *smallestInt = @(-INT_MAX - 1); // [NSNumber numberWithInt:(-INT_MAX - 1)] -NSNumber *piOverTwo = @(M_PI / 2); // [NSNumber numberWithDouble:(M_PI / 2)] - -// enumerated types. -typedef enum { Red, Green, Blue } Color; -NSNumber *favoriteColor = @(Green); // [NSNumber numberWithInt:((int)Green)] - -// strings. -NSString *path = @(getenv("PATH")); // [NSString stringWithUTF8String:(getenv("PATH"))] -NSArray *pathComponents = [path componentsSeparatedByString:@":"]; -</pre> - -<h3>Boxed Enums</h3> - -<p> -Cocoa frameworks frequently define constant values using <em>enums.</em> Although enum values are integral, they may not be used directly as boxed literals (this avoids conflicts with future <code>'@'</code>-prefixed Objective-C keywords). Instead, an enum value must be placed inside a boxed expression. The following example demonstrates configuring an <code>AVAudioRecorder</code> using a dictionary that contains a boxed enumeration value: -</p> - -<pre> -enum { - AVAudioQualityMin = 0, - AVAudioQualityLow = 0x20, - AVAudioQualityMedium = 0x40, - AVAudioQualityHigh = 0x60, - AVAudioQualityMax = 0x7F -}; - -- (AVAudioRecorder *)recordToFile:(NSURL *)fileURL { - NSDictionary *settings = @{ AVEncoderAudioQualityKey : @(AVAudioQualityMax) }; - return [[AVAudioRecorder alloc] initWithURL:fileURL settings:settings error:NULL]; -} -</pre> - -<p> -The expression <code>@(AVAudioQualityMax)</code> converts <code>AVAudioQualityMax</code> to an integer type, and boxes the value accordingly. If the enum has a <a href="http://clang.llvm.org/docs/LanguageExtensions.html#objc_fixed_enum">fixed underlying type</a> as in: -</p> - -<pre> -typedef enum : unsigned char { Red, Green, Blue } Color; -NSNumber *red = @(Red), *green = @(Green), *blue = @(Blue); // => [NSNumber numberWithUnsignedChar:] -</pre> - -<p> -then the fixed underlying type will be used to select the correct <code>NSNumber</code> creation method. -</p> - -<p> -Boxing a value of enum type will result in a <code>NSNumber</code> pointer with a creation method according to the underlying type of the enum, -which can be a <a href="http://clang.llvm.org/docs/LanguageExtensions.html#objc_fixed_enum">fixed underlying type</a> or a compiler-defined -integer type capable of representing the values of all the members of the enumeration: -</p> - -<pre> -typedef enum : unsigned char { Red, Green, Blue } Color; -Color col = Red; -NSNumber *nsCol = @(col); // => [NSNumber numberWithUnsignedChar:] -</pre> - -<h3>Boxed C Strings</h3> - -<p> -A C string literal prefixed by the <code>'@'</code> token denotes an <code>NSString</code> literal in the same way a numeric literal prefixed by the <code>'@'</code> token denotes an <code>NSNumber</code> literal. When the type of the parenthesized expression is <code>(char *)</code> or <code>(const char *)</code>, the result of the boxed expression is a pointer to an <code>NSString</code> object containing equivalent character data, which is assumed to be '\0'-terminated and UTF-8 encoded. The following example converts C-style command line arguments into <code>NSString</code> objects. -</p> - -<pre> -// Partition command line arguments into positional and option arguments. -NSMutableArray *args = [NSMutableArray new]; -NSMutableDictionary *options = [NSMutableDictionary new]; -while (--argc) { - const char *arg = *++argv; - if (strncmp(arg, "--", 2) == 0) { - options[@(arg + 2)] = @(*++argv); // --key value - } else { - [args addObject:@(arg)]; // positional argument - } -} -</pre> - -<p> -As with all C pointers, character pointer expressions can involve arbitrary pointer arithmetic, therefore programmers must ensure that the character data is valid. Passing <code>NULL</code> as the character pointer will raise an exception at runtime. When possible, the compiler will reject <code>NULL</code> character pointers used in boxed expressions. -</p> - -<h3>Availability</h3> - -<p>Boxed expressions will be available in clang 3.2. It is not currently available in any Apple compiler.</p> - -<h2>Container Literals</h2> - -<p>Objective-C now supports a new expression syntax for creating immutable array and dictionary container objects.</p> - -<h3>Examples</h3> - -<p>Immutable array expression:</p> - -<pre> -NSArray *array = @[ @"Hello", NSApp, [NSNumber numberWithInt:42] ]; -</pre> - -<p>This creates an <code>NSArray</code> with 3 elements. The comma-separated sub-expressions of an array literal can be any Objective-C object pointer typed expression.</p> - -<p>Immutable dictionary expression:</p> - -<pre> -NSDictionary *dictionary = @{ - @"name" : NSUserName(), - @"date" : [NSDate date], - @"processInfo" : [NSProcessInfo processInfo] -}; -</pre> - -<p>This creates an <code>NSDictionary</code> with 3 key/value pairs. Value sub-expressions of a dictionary literal must be Objective-C object pointer typed, as in array literals. Key sub-expressions must be of an Objective-C object pointer type that implements the <code><NSCopying></code> protocol.</p> - -<h3>Discussion</h3> - -<p>Neither keys nor values can have the value <code>nil</code> in containers. If the compiler can prove that a key or value is <code>nil</code> at compile time, then a warning will be emitted. Otherwise, a runtime error will occur.</p> - -<p>Using array and dictionary literals is safer than the variadic creation forms commonly in use today. Array literal expressions expand to calls to <code>+[NSArray arrayWithObjects:count:]</code>, which validates that all objects are non-<code>nil</code>. The variadic form, <code>+[NSArray arrayWithObjects:]</code> uses <code>nil</code> as an argument list terminator, which can lead to malformed array objects. Dictionary literals are similarly created with <code>+[NSDictionary dictionaryWithObjects:forKeys:count:]</code> which validates all objects and keys, unlike <code>+[NSDictionary dictionaryWithObjectsAndKeys:]</code> which also uses a <code>nil</code> parameter as an argument list terminator.</p> - -<h2>Object Subscripting</h2> - -<p>Objective-C object pointer values can now be used with C's subscripting operator.</p> - -<h3>Examples</h3> - -<p>The following code demonstrates the use of object subscripting syntax with <code>NSMutableArray</code> and <code>NSMutableDictionary</code> objects:</p> - -<pre> -NSMutableArray *array = ...; -NSUInteger idx = ...; -id newObject = ...; -id oldObject = array[idx]; -array[idx] = newObject; // replace oldObject with newObject - -NSMutableDictionary *dictionary = ...; -NSString *key = ...; -oldObject = dictionary[key]; -dictionary[key] = newObject; // replace oldObject with newObject -</pre> - -<p>The next section explains how subscripting expressions map to accessor methods.</p> - -<h3>Subscripting Methods</h3> - -<p>Objective-C supports two kinds of subscript expressions: <i>array-style</i> subscript expressions use integer typed subscripts; <i>dictionary-style</i> subscript expressions use Objective-C object pointer typed subscripts. Each type of subscript expression is mapped to a message send using a predefined selector. The advantage of this design is flexibility: class designers are free to introduce subscripting by declaring methods or by adopting protocols. Moreover, because the method names are selected by the type of the subscript, an object can be subscripted using both array and dictionary styles.</p> - -<h4>Array-Style Subscripting</h4> - -<p>When the subscript operand has an integral type, the expression is rewritten to use one of two different selectors, depending on whether the element is being read or written. When an expression reads an element using an integral index, as in the following example:</p> - -<pre> -NSUInteger idx = ...; -id value = object[idx]; -</pre> - -<p>it is translated into a call to <code>objectAtIndexedSubscript:</code></p> - -<pre> -id value = [object objectAtIndexedSubscript:idx]; -</pre> - -<p>When an expression writes an element using an integral index:</p> - -<pre> -object[idx] = newValue; -</pre> - -<p>it is translated to a call to <code>setObject:atIndexedSubscript:</code></p> - -<pre> -[object setObject:newValue atIndexedSubscript:idx]; -</pre> - -<p>These message sends are then type-checked and performed just like explicit message sends. The method used for objectAtIndexedSubscript: must be declared with an argument of integral type and a return value of some Objective-C object pointer type. The method used for setObject:atIndexedSubscript: must be declared with its first argument having some Objective-C pointer type and its second argument having integral type.</p> - -<p>The meaning of indexes is left up to the declaring class. The compiler will coerce the index to the appropriate argument type of the method it uses for type-checking. For an instance of <code>NSArray</code>, reading an element using an index outside the range <code>[0, array.count)</code> will raise an exception. For an instance of <code>NSMutableArray</code>, assigning to an element using an index within this range will replace that element, but assigning to an element using an index outside this range will raise an exception; no syntax is provided for inserting, appending, or removing elements for mutable arrays.</p> - -<p>A class need not declare both methods in order to take advantage of this language feature. For example, the class <code>NSArray</code> declares only <code>objectAtIndexedSubscript:</code>, so that assignments to elements will fail to type-check; moreover, its subclass <code>NSMutableArray</code> declares <code>setObject:atIndexedSubscript:</code>.</p> - -<h4>Dictionary-Style Subscripting</h4> - -<p>When the subscript operand has an Objective-C object pointer type, the expression is rewritten to use one of two different selectors, depending on whether the element is being read from or written to. When an expression reads an element using an Objective-C object pointer subscript operand, as in the following example:</p> - -<pre> -id key = ...; -id value = object[key]; -</pre> - -<p>it is translated into a call to the <code>objectForKeyedSubscript:</code> method:</p> - -<pre> -id value = [object objectForKeyedSubscript:key]; -</pre> - -<p>When an expression writes an element using an Objective-C object pointer subscript:</p> - -<pre> -object[key] = newValue; -</pre> - -<p>it is translated to a call to <code>setObject:forKeyedSubscript:</code></p> - -<pre> -[object setObject:newValue forKeyedSubscript:key]; -</pre> - -<p>The behavior of <code>setObject:forKeyedSubscript:</code> is class-specific; but in general it should replace an existing value if one is already associated with a key, otherwise it should add a new value for the key. No syntax is provided for removing elements from mutable dictionaries.</p> - -<h3>Discussion</h3> - -<p>An Objective-C subscript expression occurs when the base operand of the C subscript operator has an Objective-C object pointer type. Since this potentially collides with pointer arithmetic on the value, these expressions are only supported under the modern Objective-C runtime, which categorically forbids such arithmetic.</p> - -<p>Currently, only subscripts of integral or Objective-C object pointer type are supported. In C++, a class type can be used if it has a single conversion function to an integral or Objective-C pointer type, in which case that conversion is applied and analysis continues as appropriate. Otherwise, the expression is ill-formed.</p> - -<p>An Objective-C object subscript expression is always an l-value. If the expression appears on the left-hand side of a simple assignment operator (=), the element is written as described below. If the expression appears on the left-hand side of a compound assignment operator (e.g. +=), the program is ill-formed, because the result of reading an element is always an Objective-C object pointer and no binary operators are legal on such pointers. If the expression appears in any other position, the element is read as described below. It is an error to take the address of a subscript expression, or (in C++) to bind a reference to it.</p> - -<p>Programs can use object subscripting with Objective-C object pointers of type <code>id</code>. Normal dynamic message send rules apply; the compiler must see <i>some</i> declaration of the subscripting methods, and will pick the declaration seen first.</p> - -<h2>Caveats</h2> - -<p>Objects created using the literal or boxed expression syntax are not guaranteed to be uniqued by the runtime, but nor are they guaranteed to be newly-allocated. As such, the result of performing direct comparisons against the location of an object literal (using <code>==</code>, <code>!=</code>, <code><</code>, <code><=</code>, <code>></code>, or <code>>=</code>) is not well-defined. This is usually a simple mistake in code that intended to call the <code>isEqual:</code> method (or the <code>compare:</code> method).</p> - -<p>This caveat applies to compile-time string literals as well. Historically, string literals (using the <code>@"..."</code> syntax) have been uniqued across translation units during linking. This is an implementation detail of the compiler and should not be relied upon. If you are using such code, please use global string constants instead (<code>NSString * const MyConst = @"..."</code>) or use <code>isEqual:</code>.</p> - -<h2>Grammar Additions</h2> - -<p>To support the new syntax described above, the Objective-C <code>@</code>-expression grammar has the following new productions:</p> - -<pre> -objc-at-expression : '@' (string-literal | encode-literal | selector-literal | protocol-literal | object-literal) - ; - -object-literal : ('+' | '-')? numeric-constant - | character-constant - | boolean-constant - | array-literal - | dictionary-literal - ; - -boolean-constant : '__objc_yes' | '__objc_no' | 'true' | 'false' /* boolean keywords. */ - ; - -array-literal : '[' assignment-expression-list ']' - ; - -assignment-expression-list : assignment-expression (',' assignment-expression-list)? - | /* empty */ - ; - -dictionary-literal : '{' key-value-list '}' - ; - -key-value-list : key-value-pair (',' key-value-list)? - | /* empty */ - ; - -key-value-pair : assignment-expression ':' assignment-expression - ; -</pre> - -<p>Note: <code>@true</code> and <code>@false</code> are only supported in Objective-C++.</p> - -<h2>Availability Checks</h2> - -<p>Programs test for the new features by using clang's __has_feature checks. Here are examples of their use:</p> - -<pre> -#if __has_feature(objc_array_literals) - // new way. - NSArray *elements = @[ @"H", @"He", @"O", @"C" ]; -#else - // old way (equivalent). - id objects[] = { @"H", @"He", @"O", @"C" }; - NSArray *elements = [NSArray arrayWithObjects:objects count:4]; -#endif - -#if __has_feature(objc_dictionary_literals) - // new way. - NSDictionary *masses = @{ @"H" : @1.0078, @"He" : @4.0026, @"O" : @15.9990, @"C" : @12.0096 }; -#else - // old way (equivalent). - id keys[] = { @"H", @"He", @"O", @"C" }; - id values[] = { [NSNumber numberWithDouble:1.0078], [NSNumber numberWithDouble:4.0026], - [NSNumber numberWithDouble:15.9990], [NSNumber numberWithDouble:12.0096] }; - NSDictionary *masses = [NSDictionary dictionaryWithObjects:objects forKeys:keys count:4]; -#endif - -#if __has_feature(objc_subscripting) - NSUInteger i, count = elements.count; - for (i = 0; i < count; ++i) { - NSString *element = elements[i]; - NSNumber *mass = masses[element]; - NSLog(@"the mass of %@ is %@", element, mass); - } -#else - NSUInteger i, count = [elements count]; - for (i = 0; i < count; ++i) { - NSString *element = [elements objectAtIndex:i]; - NSNumber *mass = [masses objectForKey:element]; - NSLog(@"the mass of %@ is %@", element, mass); - } -#endif -</pre> - -<p>Code can use also <code>__has_feature(objc_bool)</code> to check for the availability of numeric literals support. This checks for the new <code>__objc_yes / __objc_no</code> keywords, which enable the use of <code>@YES / @NO</code> literals.</p> - -<p>To check whether boxed expressions are supported, use <code>__has_feature(objc_boxed_expressions)</code> feature macro.</p> - -</div> -</body> -</html> diff --git a/docs/ObjectiveCLiterals.rst b/docs/ObjectiveCLiterals.rst new file mode 100644 index 0000000..92e4fb65 --- /dev/null +++ b/docs/ObjectiveCLiterals.rst @@ -0,0 +1,554 @@ +==================== +Objective-C Literals +==================== + +Introduction +============ + +Three new features were introduced into clang at the same time: +*NSNumber Literals* provide a syntax for creating ``NSNumber`` from +scalar literal expressions; *Collection Literals* provide a short-hand +for creating arrays and dictionaries; *Object Subscripting* provides a +way to use subscripting with Objective-C objects. Users of Apple +compiler releases can use these features starting with the Apple LLVM +Compiler 4.0. Users of open-source LLVM.org compiler releases can use +these features starting with clang v3.1. + +These language additions simplify common Objective-C programming +patterns, make programs more concise, and improve the safety of +container creation. + +This document describes how the features are implemented in clang, and +how to use them in your own programs. + +NSNumber Literals +================= + +The framework class ``NSNumber`` is used to wrap scalar values inside +objects: signed and unsigned integers (``char``, ``short``, ``int``, +``long``, ``long long``), floating point numbers (``float``, +``double``), and boolean values (``BOOL``, C++ ``bool``). Scalar values +wrapped in objects are also known as *boxed* values. + +In Objective-C, any character, numeric or boolean literal prefixed with +the ``'@'`` character will evaluate to a pointer to an ``NSNumber`` +object initialized with that value. C's type suffixes may be used to +control the size of numeric literals. + +Examples +-------- + +The following program illustrates the rules for ``NSNumber`` literals: + +.. code-block:: objc + + void main(int argc, const char *argv[]) { + // character literals. + NSNumber *theLetterZ = @'Z'; // equivalent to [NSNumber numberWithChar:'Z'] + + // integral literals. + NSNumber *fortyTwo = @42; // equivalent to [NSNumber numberWithInt:42] + NSNumber *fortyTwoUnsigned = @42U; // equivalent to [NSNumber numberWithUnsignedInt:42U] + NSNumber *fortyTwoLong = @42L; // equivalent to [NSNumber numberWithLong:42L] + NSNumber *fortyTwoLongLong = @42LL; // equivalent to [NSNumber numberWithLongLong:42LL] + + // floating point literals. + NSNumber *piFloat = @3.141592654F; // equivalent to [NSNumber numberWithFloat:3.141592654F] + NSNumber *piDouble = @3.1415926535; // equivalent to [NSNumber numberWithDouble:3.1415926535] + + // BOOL literals. + NSNumber *yesNumber = @YES; // equivalent to [NSNumber numberWithBool:YES] + NSNumber *noNumber = @NO; // equivalent to [NSNumber numberWithBool:NO] + + #ifdef __cplusplus + NSNumber *trueNumber = @true; // equivalent to [NSNumber numberWithBool:(BOOL)true] + NSNumber *falseNumber = @false; // equivalent to [NSNumber numberWithBool:(BOOL)false] + #endif + } + +Discussion +---------- + +NSNumber literals only support literal scalar values after the ``'@'``. +Consequently, ``@INT_MAX`` works, but ``@INT_MIN`` does not, because +they are defined like this: + +.. code-block:: objc + + #define INT_MAX 2147483647 /* max value for an int */ + #define INT_MIN (-2147483647-1) /* min value for an int */ + +The definition of ``INT_MIN`` is not a simple literal, but a +parenthesized expression. Parenthesized expressions are supported using +the `boxed expression <#objc_boxed_expressions>`_ syntax, which is +described in the next section. + +Because ``NSNumber`` does not currently support wrapping ``long double`` +values, the use of a ``long double NSNumber`` literal (e.g. +``@123.23L``) will be rejected by the compiler. + +Previously, the ``BOOL`` type was simply a typedef for ``signed char``, +and ``YES`` and ``NO`` were macros that expand to ``(BOOL)1`` and +``(BOOL)0`` respectively. To support ``@YES`` and ``@NO`` expressions, +these macros are now defined using new language keywords in +``<objc/objc.h>``: + +.. code-block:: objc + + #if __has_feature(objc_bool) + #define YES __objc_yes + #define NO __objc_no + #else + #define YES ((BOOL)1) + #define NO ((BOOL)0) + #endif + +The compiler implicitly converts ``__objc_yes`` and ``__objc_no`` to +``(BOOL)1`` and ``(BOOL)0``. The keywords are used to disambiguate +``BOOL`` and integer literals. + +Objective-C++ also supports ``@true`` and ``@false`` expressions, which +are equivalent to ``@YES`` and ``@NO``. + +Boxed Expressions +================= + +Objective-C provides a new syntax for boxing C expressions: + +.. code-block:: objc + + @( <expression> ) + +Expressions of scalar (numeric, enumerated, BOOL) and C string pointer +types are supported: + +.. code-block:: objc + + // numbers. + NSNumber *smallestInt = @(-INT_MAX - 1); // [NSNumber numberWithInt:(-INT_MAX - 1)] + NSNumber *piOverTwo = @(M_PI / 2); // [NSNumber numberWithDouble:(M_PI / 2)] + + // enumerated types. + typedef enum { Red, Green, Blue } Color; + NSNumber *favoriteColor = @(Green); // [NSNumber numberWithInt:((int)Green)] + + // strings. + NSString *path = @(getenv("PATH")); // [NSString stringWithUTF8String:(getenv("PATH"))] + NSArray *pathComponents = [path componentsSeparatedByString:@":"]; + +Boxed Enums +----------- + +Cocoa frameworks frequently define constant values using *enums.* +Although enum values are integral, they may not be used directly as +boxed literals (this avoids conflicts with future ``'@'``-prefixed +Objective-C keywords). Instead, an enum value must be placed inside a +boxed expression. The following example demonstrates configuring an +``AVAudioRecorder`` using a dictionary that contains a boxed enumeration +value: + +.. code-block:: objc + + enum { + AVAudioQualityMin = 0, + AVAudioQualityLow = 0x20, + AVAudioQualityMedium = 0x40, + AVAudioQualityHigh = 0x60, + AVAudioQualityMax = 0x7F + }; + + - (AVAudioRecorder *)recordToFile:(NSURL *)fileURL { + NSDictionary *settings = @{ AVEncoderAudioQualityKey : @(AVAudioQualityMax) }; + return [[AVAudioRecorder alloc] initWithURL:fileURL settings:settings error:NULL]; + } + +The expression ``@(AVAudioQualityMax)`` converts ``AVAudioQualityMax`` +to an integer type, and boxes the value accordingly. If the enum has a +:ref:`fixed underlying type <objc-fixed-enum>` as in: + +.. code-block:: objc + + typedef enum : unsigned char { Red, Green, Blue } Color; + NSNumber *red = @(Red), *green = @(Green), *blue = @(Blue); // => [NSNumber numberWithUnsignedChar:] + +then the fixed underlying type will be used to select the correct +``NSNumber`` creation method. + +Boxing a value of enum type will result in a ``NSNumber`` pointer with a +creation method according to the underlying type of the enum, which can +be a :ref:`fixed underlying type <objc-fixed-enum>` +or a compiler-defined integer type capable of representing the values of +all the members of the enumeration: + +.. code-block:: objc + + typedef enum : unsigned char { Red, Green, Blue } Color; + Color col = Red; + NSNumber *nsCol = @(col); // => [NSNumber numberWithUnsignedChar:] + +Boxed C Strings +--------------- + +A C string literal prefixed by the ``'@'`` token denotes an ``NSString`` +literal in the same way a numeric literal prefixed by the ``'@'`` token +denotes an ``NSNumber`` literal. When the type of the parenthesized +expression is ``(char *)`` or ``(const char *)``, the result of the +boxed expression is a pointer to an ``NSString`` object containing +equivalent character data, which is assumed to be '\\0'-terminated and +UTF-8 encoded. The following example converts C-style command line +arguments into ``NSString`` objects. + +.. code-block:: objc + + // Partition command line arguments into positional and option arguments. + NSMutableArray *args = [NSMutableArray new]; + NSMutableDictionary *options = [NSMutableDictionary new]; + while (--argc) { + const char *arg = *++argv; + if (strncmp(arg, "--", 2) == 0) { + options[@(arg + 2)] = @(*++argv); // --key value + } else { + [args addObject:@(arg)]; // positional argument + } + } + +As with all C pointers, character pointer expressions can involve +arbitrary pointer arithmetic, therefore programmers must ensure that the +character data is valid. Passing ``NULL`` as the character pointer will +raise an exception at runtime. When possible, the compiler will reject +``NULL`` character pointers used in boxed expressions. + +Availability +------------ + +Boxed expressions will be available in clang 3.2. It is not currently +available in any Apple compiler. + +Container Literals +================== + +Objective-C now supports a new expression syntax for creating immutable +array and dictionary container objects. + +Examples +-------- + +Immutable array expression: + +.. code-block:: objc + + NSArray *array = @[ @"Hello", NSApp, [NSNumber numberWithInt:42] ]; + +This creates an ``NSArray`` with 3 elements. The comma-separated +sub-expressions of an array literal can be any Objective-C object +pointer typed expression. + +Immutable dictionary expression: + +.. code-block:: objc + + NSDictionary *dictionary = @{ + @"name" : NSUserName(), + @"date" : [NSDate date], + @"processInfo" : [NSProcessInfo processInfo] + }; + +This creates an ``NSDictionary`` with 3 key/value pairs. Value +sub-expressions of a dictionary literal must be Objective-C object +pointer typed, as in array literals. Key sub-expressions must be of an +Objective-C object pointer type that implements the +``<NSCopying>`` protocol. + +Discussion +---------- + +Neither keys nor values can have the value ``nil`` in containers. If the +compiler can prove that a key or value is ``nil`` at compile time, then +a warning will be emitted. Otherwise, a runtime error will occur. + +Using array and dictionary literals is safer than the variadic creation +forms commonly in use today. Array literal expressions expand to calls +to ``+[NSArray arrayWithObjects:count:]``, which validates that all +objects are non-``nil``. The variadic form, +``+[NSArray arrayWithObjects:]`` uses ``nil`` as an argument list +terminator, which can lead to malformed array objects. Dictionary +literals are similarly created with +``+[NSDictionary dictionaryWithObjects:forKeys:count:]`` which validates +all objects and keys, unlike +``+[NSDictionary dictionaryWithObjectsAndKeys:]`` which also uses a +``nil`` parameter as an argument list terminator. + +Object Subscripting +=================== + +Objective-C object pointer values can now be used with C's subscripting +operator. + +Examples +-------- + +The following code demonstrates the use of object subscripting syntax +with ``NSMutableArray`` and ``NSMutableDictionary`` objects: + +.. code-block:: objc + + NSMutableArray *array = ...; + NSUInteger idx = ...; + id newObject = ...; + id oldObject = array[idx]; + array[idx] = newObject; // replace oldObject with newObject + + NSMutableDictionary *dictionary = ...; + NSString *key = ...; + oldObject = dictionary[key]; + dictionary[key] = newObject; // replace oldObject with newObject + +The next section explains how subscripting expressions map to accessor +methods. + +Subscripting Methods +-------------------- + +Objective-C supports two kinds of subscript expressions: *array-style* +subscript expressions use integer typed subscripts; *dictionary-style* +subscript expressions use Objective-C object pointer typed subscripts. +Each type of subscript expression is mapped to a message send using a +predefined selector. The advantage of this design is flexibility: class +designers are free to introduce subscripting by declaring methods or by +adopting protocols. Moreover, because the method names are selected by +the type of the subscript, an object can be subscripted using both array +and dictionary styles. + +Array-Style Subscripting +^^^^^^^^^^^^^^^^^^^^^^^^ + +When the subscript operand has an integral type, the expression is +rewritten to use one of two different selectors, depending on whether +the element is being read or written. When an expression reads an +element using an integral index, as in the following example: + +.. code-block:: objc + + NSUInteger idx = ...; + id value = object[idx]; + +it is translated into a call to ``objectAtIndexedSubscript:`` + +.. code-block:: objc + + id value = [object objectAtIndexedSubscript:idx]; + +When an expression writes an element using an integral index: + +.. code-block:: objc + + object[idx] = newValue; + +it is translated to a call to ``setObject:atIndexedSubscript:`` + +.. code-block:: objc + + [object setObject:newValue atIndexedSubscript:idx]; + +These message sends are then type-checked and performed just like +explicit message sends. The method used for objectAtIndexedSubscript: +must be declared with an argument of integral type and a return value of +some Objective-C object pointer type. The method used for +setObject:atIndexedSubscript: must be declared with its first argument +having some Objective-C pointer type and its second argument having +integral type. + +The meaning of indexes is left up to the declaring class. The compiler +will coerce the index to the appropriate argument type of the method it +uses for type-checking. For an instance of ``NSArray``, reading an +element using an index outside the range ``[0, array.count)`` will raise +an exception. For an instance of ``NSMutableArray``, assigning to an +element using an index within this range will replace that element, but +assigning to an element using an index outside this range will raise an +exception; no syntax is provided for inserting, appending, or removing +elements for mutable arrays. + +A class need not declare both methods in order to take advantage of this +language feature. For example, the class ``NSArray`` declares only +``objectAtIndexedSubscript:``, so that assignments to elements will fail +to type-check; moreover, its subclass ``NSMutableArray`` declares +``setObject:atIndexedSubscript:``. + +Dictionary-Style Subscripting +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When the subscript operand has an Objective-C object pointer type, the +expression is rewritten to use one of two different selectors, depending +on whether the element is being read from or written to. When an +expression reads an element using an Objective-C object pointer +subscript operand, as in the following example: + +.. code-block:: objc + + id key = ...; + id value = object[key]; + +it is translated into a call to the ``objectForKeyedSubscript:`` method: + +.. code-block:: objc + + id value = [object objectForKeyedSubscript:key]; + +When an expression writes an element using an Objective-C object pointer +subscript: + +.. code-block:: objc + + object[key] = newValue; + +it is translated to a call to ``setObject:forKeyedSubscript:`` + +.. code-block:: objc + + [object setObject:newValue forKeyedSubscript:key]; + +The behavior of ``setObject:forKeyedSubscript:`` is class-specific; but +in general it should replace an existing value if one is already +associated with a key, otherwise it should add a new value for the key. +No syntax is provided for removing elements from mutable dictionaries. + +Discussion +---------- + +An Objective-C subscript expression occurs when the base operand of the +C subscript operator has an Objective-C object pointer type. Since this +potentially collides with pointer arithmetic on the value, these +expressions are only supported under the modern Objective-C runtime, +which categorically forbids such arithmetic. + +Currently, only subscripts of integral or Objective-C object pointer +type are supported. In C++, a class type can be used if it has a single +conversion function to an integral or Objective-C pointer type, in which +case that conversion is applied and analysis continues as appropriate. +Otherwise, the expression is ill-formed. + +An Objective-C object subscript expression is always an l-value. If the +expression appears on the left-hand side of a simple assignment operator +(=), the element is written as described below. If the expression +appears on the left-hand side of a compound assignment operator (e.g. ++=), the program is ill-formed, because the result of reading an element +is always an Objective-C object pointer and no binary operators are +legal on such pointers. If the expression appears in any other position, +the element is read as described below. It is an error to take the +address of a subscript expression, or (in C++) to bind a reference to +it. + +Programs can use object subscripting with Objective-C object pointers of +type ``id``. Normal dynamic message send rules apply; the compiler must +see *some* declaration of the subscripting methods, and will pick the +declaration seen first. + +Caveats +======= + +Objects created using the literal or boxed expression syntax are not +guaranteed to be uniqued by the runtime, but nor are they guaranteed to +be newly-allocated. As such, the result of performing direct comparisons +against the location of an object literal (using ``==``, ``!=``, ``<``, +``<=``, ``>``, or ``>=``) is not well-defined. This is usually a simple +mistake in code that intended to call the ``isEqual:`` method (or the +``compare:`` method). + +This caveat applies to compile-time string literals as well. +Historically, string literals (using the ``@"..."`` syntax) have been +uniqued across translation units during linking. This is an +implementation detail of the compiler and should not be relied upon. If +you are using such code, please use global string constants instead +(``NSString * const MyConst = @"..."``) or use ``isEqual:``. + +Grammar Additions +================= + +To support the new syntax described above, the Objective-C +``@``-expression grammar has the following new productions: + +:: + + objc-at-expression : '@' (string-literal | encode-literal | selector-literal | protocol-literal | object-literal) + ; + + object-literal : ('+' | '-')? numeric-constant + | character-constant + | boolean-constant + | array-literal + | dictionary-literal + ; + + boolean-constant : '__objc_yes' | '__objc_no' | 'true' | 'false' /* boolean keywords. */ + ; + + array-literal : '[' assignment-expression-list ']' + ; + + assignment-expression-list : assignment-expression (',' assignment-expression-list)? + | /* empty */ + ; + + dictionary-literal : '{' key-value-list '}' + ; + + key-value-list : key-value-pair (',' key-value-list)? + | /* empty */ + ; + + key-value-pair : assignment-expression ':' assignment-expression + ; + +Note: ``@true`` and ``@false`` are only supported in Objective-C++. + +Availability Checks +=================== + +Programs test for the new features by using clang's \_\_has\_feature +checks. Here are examples of their use: + +.. code-block:: objc + + #if __has_feature(objc_array_literals) + // new way. + NSArray *elements = @[ @"H", @"He", @"O", @"C" ]; + #else + // old way (equivalent). + id objects[] = { @"H", @"He", @"O", @"C" }; + NSArray *elements = [NSArray arrayWithObjects:objects count:4]; + #endif + + #if __has_feature(objc_dictionary_literals) + // new way. + NSDictionary *masses = @{ @"H" : @1.0078, @"He" : @4.0026, @"O" : @15.9990, @"C" : @12.0096 }; + #else + // old way (equivalent). + id keys[] = { @"H", @"He", @"O", @"C" }; + id values[] = { [NSNumber numberWithDouble:1.0078], [NSNumber numberWithDouble:4.0026], + [NSNumber numberWithDouble:15.9990], [NSNumber numberWithDouble:12.0096] }; + NSDictionary *masses = [NSDictionary dictionaryWithObjects:objects forKeys:keys count:4]; + #endif + + #if __has_feature(objc_subscripting) + NSUInteger i, count = elements.count; + for (i = 0; i < count; ++i) { + NSString *element = elements[i]; + NSNumber *mass = masses[element]; + NSLog(@"the mass of %@ is %@", element, mass); + } + #else + NSUInteger i, count = [elements count]; + for (i = 0; i < count; ++i) { + NSString *element = [elements objectAtIndex:i]; + NSNumber *mass = [masses objectForKey:element]; + NSLog(@"the mass of %@ is %@", element, mass); + } + #endif + +Code can use also ``__has_feature(objc_bool)`` to check for the +availability of numeric literals support. This checks for the new +``__objc_yes / __objc_no`` keywords, which enable the use of +``@YES / @NO`` literals. + +To check whether boxed expressions are supported, use +``__has_feature(objc_boxed_expressions)`` feature macro. diff --git a/docs/PCHInternals.html b/docs/PCHInternals.html deleted file mode 100644 index 7fed5ba..0000000 --- a/docs/PCHInternals.html +++ /dev/null @@ -1,658 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <title>Precompiled Header and Modules Internals</title> - <link type="text/css" rel="stylesheet" href="../menu.css"> - <link type="text/css" rel="stylesheet" href="../content.css"> - <style type="text/css"> - td { - vertical-align: top; - } - </style> -</head> - -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>Precompiled Header and Modules Internals</h1> - - <p>This document describes the design and implementation of Clang's - precompiled headers (PCH) and modules. If you are interested in the end-user - view, please see the <a - href="UsersManual.html#precompiledheaders">User's Manual</a>.</p> - - <p><b>Table of Contents</b></p> - <ul> - <li><a href="#usage">Using Precompiled Headers with - <tt>clang</tt></a></li> - <li><a href="#philosophy">Design Philosophy</a></li> - <li><a href="#contents">Serialized AST File Contents</a> - <ul> - <li><a href="#metadata">Metadata Block</a></li> - <li><a href="#sourcemgr">Source Manager Block</a></li> - <li><a href="#preprocessor">Preprocessor Block</a></li> - <li><a href="#types">Types Block</a></li> - <li><a href="#decls">Declarations Block</a></li> - <li><a href="#stmt">Statements and Expressions</a></li> - <li><a href="#idtable">Identifier Table Block</a></li> - <li><a href="#method-pool">Method Pool Block</a></li> - </ul> - </li> - <li><a href="#tendrils">AST Reader Integration Points</a></li> - <li><a href="#chained">Chained precompiled headers</a></li> - <li><a href="#modules">Modules</a></li> -</ul> - -<h2 id="usage">Using Precompiled Headers with <tt>clang</tt></h2> - -<p>The Clang compiler frontend, <tt>clang -cc1</tt>, supports two command line -options for generating and using PCH files.<p> - -<p>To generate PCH files using <tt>clang -cc1</tt>, use the option -<b><tt>-emit-pch</tt></b>: - -<pre> $ clang -cc1 test.h -emit-pch -o test.h.pch </pre> - -<p>This option is transparently used by <tt>clang</tt> when generating -PCH files. The resulting PCH file contains the serialized form of the -compiler's internal representation after it has completed parsing and -semantic analysis. The PCH file can then be used as a prefix header -with the <b><tt>-include-pch</tt></b> option:</p> - -<pre> - $ clang -cc1 -include-pch test.h.pch test.c -o test.s -</pre> - -<h2 id="philosophy">Design Philosophy</h2> - -<p>Precompiled headers are meant to improve overall compile times for - projects, so the design of precompiled headers is entirely driven by - performance concerns. The use case for precompiled headers is - relatively simple: when there is a common set of headers that is - included in nearly every source file in the project, we - <i>precompile</i> that bundle of headers into a single precompiled - header (PCH file). Then, when compiling the source files in the - project, we load the PCH file first (as a prefix header), which acts - as a stand-in for that bundle of headers.</p> - -<p>A precompiled header implementation improves performance when:</p> -<ul> - <li>Loading the PCH file is significantly faster than re-parsing the - bundle of headers stored within the PCH file. Thus, a precompiled - header design attempts to minimize the cost of reading the PCH - file. Ideally, this cost should not vary with the size of the - precompiled header file.</li> - - <li>The cost of generating the PCH file initially is not so large - that it counters the per-source-file performance improvement due to - eliminating the need to parse the bundled headers in the first - place. This is particularly important on multi-core systems, because - PCH file generation serializes the build when all compilations - require the PCH file to be up-to-date.</li> -</ul> - -<p>Modules, as implemented in Clang, use the same mechanisms as -precompiled headers to save a serialized AST file (one per module) and -use those AST modules. From an implementation standpoint, modules are -a generalization of precompiled headers, lifting a number of -restrictions placed on precompiled headers. In particular, there can -only be one precompiled header and it must be included at the -beginning of the translation unit. The extensions to the AST file -format required for modules are discussed in the section on <a href="#modules">modules</a>.</p> - -<p>Clang's AST files are designed with a compact on-disk -representation, which minimizes both creation time and the time -required to initially load the AST file. The AST file itself contains -a serialized representation of Clang's abstract syntax trees and -supporting data structures, stored using the same compressed bitstream -as <a href="http://llvm.org/docs/BitCodeFormat.html">LLVM's bitcode -file format</a>.</p> - -<p>Clang's AST files are loaded "lazily" from disk. When an -AST file is initially loaded, Clang reads only a small amount of data -from the AST file to establish where certain important data structures -are stored. The amount of data read in this initial load is -independent of the size of the AST file, such that a larger AST file -does not lead to longer AST load times. The actual header data in the -AST file--macros, functions, variables, types, etc.--is loaded only -when it is referenced from the user's code, at which point only that -entity (and those entities it depends on) are deserialized from the -AST file. With this approach, the cost of using an AST file -for a translation unit is proportional to the amount of code actually -used from the AST file, rather than being proportional to the size of -the AST file itself.</p> - -<p>When given the <code>-print-stats</code> option, Clang produces -statistics describing how much of the AST file was actually -loaded from disk. For a simple "Hello, World!" program that includes -the Apple <code>Cocoa.h</code> header (which is built as a precompiled -header), this option illustrates how little of the actual precompiled -header is required:</p> - -<pre> -*** PCH Statistics: - 933 stat cache hits - 4 stat cache misses - 895/39981 source location entries read (2.238563%) - 19/15315 types read (0.124061%) - 20/82685 declarations read (0.024188%) - 154/58070 identifiers read (0.265197%) - 0/7260 selectors read (0.000000%) - 0/30842 statements read (0.000000%) - 4/8400 macros read (0.047619%) - 1/4995 lexical declcontexts read (0.020020%) - 0/4413 visible declcontexts read (0.000000%) - 0/7230 method pool entries read (0.000000%) - 0 method pool misses -</pre> - -<p>For this small program, only a tiny fraction of the source -locations, types, declarations, identifiers, and macros were actually -deserialized from the precompiled header. These statistics can be -useful to determine whether the AST file implementation can -be improved by making more of the implementation lazy.</p> - -<p>Precompiled headers can be chained. When you create a PCH while -including an existing PCH, Clang can create the new PCH by referencing -the original file and only writing the new data to the new file. For -example, you could create a PCH out of all the headers that are very -commonly used throughout your project, and then create a PCH for every -single source file in the project that includes the code that is -specific to that file, so that recompiling the file itself is very fast, -without duplicating the data from the common headers for every -file. The mechanisms behind chained precompiled headers are discussed -in a <a href="#chained">later section</a>. - -<h2 id="contents">AST File Contents</h2> - -<img src="PCHLayout.png" style="float:right" alt="Precompiled header layout"> - -<p>Clang's AST files are organized into several different -blocks, each of which contains the serialized representation of a part -of Clang's internal representation. Each of the blocks corresponds to -either a block or a record within <a - href="http://llvm.org/docs/BitCodeFormat.html">LLVM's bitstream -format</a>. The contents of each of these logical blocks are described -below.</p> - -<p>For a given AST file, the <a -href="http://llvm.org/cmds/llvm-bcanalyzer.html"><code>llvm-bcanalyzer</code></a> -utility can be used to examine the actual structure of the bitstream -for the AST file. This information can be used both to help -understand the structure of the AST file and to isolate -areas where AST files can still be optimized, e.g., through -the introduction of abbreviations.</p> - -<h3 id="metadata">Metadata Block</h3> - -<p>The metadata block contains several records that provide -information about how the AST file was built. This metadata -is primarily used to validate the use of an AST file. For -example, a precompiled header built for a 32-bit x86 target cannot be used -when compiling for a 64-bit x86 target. The metadata block contains -information about:</p> - -<dl> - <dt>Language options</dt> - <dd>Describes the particular language dialect used to compile the -AST file, including major options (e.g., Objective-C support) and more -minor options (e.g., support for "//" comments). The contents of this -record correspond to the <code>LangOptions</code> class.</dd> - - <dt>Target architecture</dt> - <dd>The target triple that describes the architecture, platform, and -ABI for which the AST file was generated, e.g., -<code>i386-apple-darwin9</code>.</dd> - - <dt>AST version</dt> - <dd>The major and minor version numbers of the AST file -format. Changes in the minor version number should not affect backward -compatibility, while changes in the major version number imply that a -newer compiler cannot read an older precompiled header (and -vice-versa).</dd> - - <dt>Original file name</dt> - <dd>The full path of the header that was used to generate the -AST file.</dd> - - <dt>Predefines buffer</dt> - <dd>Although not explicitly stored as part of the metadata, the -predefines buffer is used in the validation of the AST file. -The predefines buffer itself contains code generated by the compiler -to initialize the preprocessor state according to the current target, -platform, and command-line options. For example, the predefines buffer -will contain "<code>#define __STDC__ 1</code>" when we are compiling C -without Microsoft extensions. The predefines buffer itself is stored -within the <a href="#sourcemgr">source manager block</a>, but its -contents are verified along with the rest of the metadata.</dd> - -</dl> - -<p>A chained PCH file (that is, one that references another PCH) and a -module (which may import other modules) have additional metadata -containing the list of all AST files that this AST file depends -on. Each of those files will be loaded along with this AST file.</p> - -<p>For chained precompiled headers, the language options, target -architecture and predefines buffer data is taken from the end of the -chain, since they have to match anyway.</p> - -<h3 id="sourcemgr">Source Manager Block</h3> - -<p>The source manager block contains the serialized representation of -Clang's <a - href="InternalsManual.html#SourceLocation">SourceManager</a> class, -which handles the mapping from source locations (as represented in -Clang's abstract syntax tree) into actual column/line positions within -a source file or macro instantiation. The AST file's -representation of the source manager also includes information about -all of the headers that were (transitively) included when building the -AST file.</p> - -<p>The bulk of the source manager block is dedicated to information -about the various files, buffers, and macro instantiations into which -a source location can refer. Each of these is referenced by a numeric -"file ID", which is a unique number (allocated starting at 1) stored -in the source location. Clang serializes the information for each kind -of file ID, along with an index that maps file IDs to the position -within the AST file where the information about that file ID is -stored. The data associated with a file ID is loaded only when -required by the front end, e.g., to emit a diagnostic that includes a -macro instantiation history inside the header itself.</p> - -<p>The source manager block also contains information about all of the -headers that were included when building the AST file. This -includes information about the controlling macro for the header (e.g., -when the preprocessor identified that the contents of the header -dependent on a macro like <code>LLVM_CLANG_SOURCEMANAGER_H</code>) -along with a cached version of the results of the <code>stat()</code> -system calls performed when building the AST file. The -latter is particularly useful in reducing system time when searching -for include files.</p> - -<h3 id="preprocessor">Preprocessor Block</h3> - -<p>The preprocessor block contains the serialized representation of -the preprocessor. Specifically, it contains all of the macros that -have been defined by the end of the header used to build the -AST file, along with the token sequences that comprise each -macro. The macro definitions are only read from the AST file when the -name of the macro first occurs in the program. This lazy loading of -macro definitions is triggered by lookups into the <a - href="#idtable">identifier table</a>.</p> - -<h3 id="types">Types Block</h3> - -<p>The types block contains the serialized representation of all of -the types referenced in the translation unit. Each Clang type node -(<code>PointerType</code>, <code>FunctionProtoType</code>, etc.) has a -corresponding record type in the AST file. When types are deserialized -from the AST file, the data within the record is used to -reconstruct the appropriate type node using the AST context.</p> - -<p>Each type has a unique type ID, which is an integer that uniquely -identifies that type. Type ID 0 represents the NULL type, type IDs -less than <code>NUM_PREDEF_TYPE_IDS</code> represent predefined types -(<code>void</code>, <code>float</code>, etc.), while other -"user-defined" type IDs are assigned consecutively from -<code>NUM_PREDEF_TYPE_IDS</code> upward as the types are encountered. -The AST file has an associated mapping from the user-defined types -block to the location within the types block where the serialized -representation of that type resides, enabling lazy deserialization of -types. When a type is referenced from within the AST file, that -reference is encoded using the type ID shifted left by 3 bits. The -lower three bits are used to represent the <code>const</code>, -<code>volatile</code>, and <code>restrict</code> qualifiers, as in -Clang's <a - href="http://clang.llvm.org/docs/InternalsManual.html#Type">QualType</a> -class.</p> - -<h3 id="decls">Declarations Block</h3> - -<p>The declarations block contains the serialized representation of -all of the declarations referenced in the translation unit. Each Clang -declaration node (<code>VarDecl</code>, <code>FunctionDecl</code>, -etc.) has a corresponding record type in the AST file. When -declarations are deserialized from the AST file, the data -within the record is used to build and populate a new instance of the -corresponding <code>Decl</code> node. As with types, each declaration -node has a numeric ID that is used to refer to that declaration within -the AST file. In addition, a lookup table provides a mapping from that -numeric ID to the offset within the precompiled header where that -declaration is described.</p> - -<p>Declarations in Clang's abstract syntax trees are stored -hierarchically. At the top of the hierarchy is the translation unit -(<code>TranslationUnitDecl</code>), which contains all of the -declarations in the translation unit but is not actually written as a -specific declaration node. Its child declarations (such as -functions or struct types) may also contain other declarations inside -them, and so on. Within Clang, each declaration is stored within a <a -href="http://clang.llvm.org/docs/InternalsManual.html#DeclContext">declaration -context</a>, as represented by the <code>DeclContext</code> class. -Declaration contexts provide the mechanism to perform name lookup -within a given declaration (e.g., find the member named <code>x</code> -in a structure) and iterate over the declarations stored within a -context (e.g., iterate over all of the fields of a structure for -structure layout).</p> - -<p>In Clang's AST file format, deserializing a declaration -that is a <code>DeclContext</code> is a separate operation from -deserializing all of the declarations stored within that declaration -context. Therefore, Clang will deserialize the translation unit -declaration without deserializing the declarations within that -translation unit. When required, the declarations stored within a -declaration context will be deserialized. There are two representations -of the declarations within a declaration context, which correspond to -the name-lookup and iteration behavior described above:</p> - -<ul> - <li>When the front end performs name lookup to find a name - <code>x</code> within a given declaration context (for example, - during semantic analysis of the expression <code>p->x</code>, - where <code>p</code>'s type is defined in the precompiled header), - Clang refers to an on-disk hash table that maps from the names - within that declaration context to the declaration IDs that - represent each visible declaration with that name. The actual - declarations will then be deserialized to provide the results of - name lookup.</li> - - <li>When the front end performs iteration over all of the - declarations within a declaration context, all of those declarations - are immediately de-serialized. For large declaration contexts (e.g., - the translation unit), this operation is expensive; however, large - declaration contexts are not traversed in normal compilation, since - such a traversal is unnecessary. However, it is common for the code - generator and semantic analysis to traverse declaration contexts for - structs, classes, unions, and enumerations, although those contexts - contain relatively few declarations in the common case.</li> -</ul> - -<h3 id="stmt">Statements and Expressions</h3> - -<p>Statements and expressions are stored in the AST file in -both the <a href="#types">types</a> and the <a - href="#decls">declarations</a> blocks, because every statement or -expression will be associated with either a type or declaration. The -actual statement and expression records are stored immediately -following the declaration or type that owns the statement or -expression. For example, the statement representing the body of a -function will be stored directly following the declaration of the -function.</p> - -<p>As with types and declarations, each statement and expression kind -in Clang's abstract syntax tree (<code>ForStmt</code>, -<code>CallExpr</code>, etc.) has a corresponding record type in the -AST file, which contains the serialized representation of -that statement or expression. Each substatement or subexpression -within an expression is stored as a separate record (which keeps most -records to a fixed size). Within the AST file, the -subexpressions of an expression are stored, in reverse order, prior to the expression -that owns those expression, using a form of <a -href="http://en.wikipedia.org/wiki/Reverse_Polish_notation">Reverse -Polish Notation</a>. For example, an expression <code>3 - 4 + 5</code> -would be represented as follows:</p> - -<table border="1"> - <tr><td><code>IntegerLiteral(5)</code></td></tr> - <tr><td><code>IntegerLiteral(4)</code></td></tr> - <tr><td><code>IntegerLiteral(3)</code></td></tr> - <tr><td><code>BinaryOperator(-)</code></td></tr> - <tr><td><code>BinaryOperator(+)</code></td></tr> - <tr><td>STOP</td></tr> -</table> - -<p>When reading this representation, Clang evaluates each expression -record it encounters, builds the appropriate abstract syntax tree node, -and then pushes that expression on to a stack. When a record contains <i>N</i> -subexpressions--<code>BinaryOperator</code> has two of them--those -expressions are popped from the top of the stack. The special STOP -code indicates that we have reached the end of a serialized expression -or statement; other expression or statement records may follow, but -they are part of a different expression.</p> - -<h3 id="idtable">Identifier Table Block</h3> - -<p>The identifier table block contains an on-disk hash table that maps -each identifier mentioned within the AST file to the -serialized representation of the identifier's information (e.g, the -<code>IdentifierInfo</code> structure). The serialized representation -contains:</p> - -<ul> - <li>The actual identifier string.</li> - <li>Flags that describe whether this identifier is the name of a - built-in, a poisoned identifier, an extension token, or a - macro.</li> - <li>If the identifier names a macro, the offset of the macro - definition within the <a href="#preprocessor">preprocessor - block</a>.</li> - <li>If the identifier names one or more declarations visible from - translation unit scope, the <a href="#decls">declaration IDs</a> of these - declarations.</li> -</ul> - -<p>When an AST file is loaded, the AST file reader -mechanism introduces itself into the identifier table as an external -lookup source. Thus, when the user program refers to an identifier -that has not yet been seen, Clang will perform a lookup into the -identifier table. If an identifier is found, its contents (macro -definitions, flags, top-level declarations, etc.) will be -deserialized, at which point the corresponding -<code>IdentifierInfo</code> structure will have the same contents it -would have after parsing the headers in the AST file.</p> - -<p>Within the AST file, the identifiers used to name declarations are represented with an integral value. A separate table provides a mapping from this integral value (the identifier ID) to the location within the on-disk -hash table where that identifier is stored. This mapping is used when -deserializing the name of a declaration, the identifier of a token, or -any other construct in the AST file that refers to a name.</p> - -<h3 id="method-pool">Method Pool Block</h3> - -<p>The method pool block is represented as an on-disk hash table that -serves two purposes: it provides a mapping from the names of -Objective-C selectors to the set of Objective-C instance and class -methods that have that particular selector (which is required for -semantic analysis in Objective-C) and also stores all of the selectors -used by entities within the AST file. The design of the -method pool is similar to that of the <a href="#idtable">identifier -table</a>: the first time a particular selector is formed during the -compilation of the program, Clang will search in the on-disk hash -table of selectors; if found, Clang will read the Objective-C methods -associated with that selector into the appropriate front-end data -structure (<code>Sema::InstanceMethodPool</code> and -<code>Sema::FactoryMethodPool</code> for instance and class methods, -respectively).</p> - -<p>As with identifiers, selectors are represented by numeric values -within the AST file. A separate index maps these numeric selector -values to the offset of the selector within the on-disk hash table, -and will be used when de-serializing an Objective-C method declaration -(or other Objective-C construct) that refers to the selector.</p> - -<h2 id="tendrils">AST Reader Integration Points</h2> - -<p>The "lazy" deserialization behavior of AST files requires -their integration into several completely different submodules of -Clang. For example, lazily deserializing the declarations during name -lookup requires that the name-lookup routines be able to query the -AST file to find entities stored there.</p> - -<p>For each Clang data structure that requires direct interaction with -the AST reader logic, there is an abstract class that provides -the interface between the two modules. The <code>ASTReader</code> -class, which handles the loading of an AST file, inherits -from all of these abstract classes to provide lazy deserialization of -Clang's data structures. <code>ASTReader</code> implements the -following abstract classes:</p> - -<dl> - <dt><code>StatSysCallCache</code></dt> - <dd>This abstract interface is associated with the - <code>FileManager</code> class, and is used whenever the file - manager is going to perform a <code>stat()</code> system call.</dd> - - <dt><code>ExternalSLocEntrySource</code></dt> - <dd>This abstract interface is associated with the - <code>SourceManager</code> class, and is used whenever the - <a href="#sourcemgr">source manager</a> needs to load the details - of a file, buffer, or macro instantiation.</dd> - - <dt><code>IdentifierInfoLookup</code></dt> - <dd>This abstract interface is associated with the - <code>IdentifierTable</code> class, and is used whenever the - program source refers to an identifier that has not yet been seen. - In this case, the AST reader searches for - this identifier within its <a href="#idtable">identifier table</a> - to load any top-level declarations or macros associated with that - identifier.</dd> - - <dt><code>ExternalASTSource</code></dt> - <dd>This abstract interface is associated with the - <code>ASTContext</code> class, and is used whenever the abstract - syntax tree nodes need to loaded from the AST file. It - provides the ability to de-serialize declarations and types - identified by their numeric values, read the bodies of functions - when required, and read the declarations stored within a - declaration context (either for iteration or for name lookup).</dd> - - <dt><code>ExternalSemaSource</code></dt> - <dd>This abstract interface is associated with the <code>Sema</code> - class, and is used whenever semantic analysis needs to read - information from the <a href="#methodpool">global method - pool</a>.</dd> -</dl> - -<h2 id="chained">Chained precompiled headers</h2> - -<p>Chained precompiled headers were initially intended to improve the -performance of IDE-centric operations such as syntax highlighting and -code completion while a particular source file is being edited by the -user. To minimize the amount of reparsing required after a change to -the file, a form of precompiled header--called a precompiled -<i>preamble</i>--is automatically generated by parsing all of the -headers in the source file, up to and including the last -#include. When only the source file changes (and none of the headers -it depends on), reparsing of that source file can use the precompiled -preamble and start parsing after the #includes, so parsing time is -proportional to the size of the source file (rather than all of its -includes). However, the compilation of that translation unit -may already use a precompiled header: in this case, Clang will create -the precompiled preamble as a chained precompiled header that refers -to the original precompiled header. This drastically reduces the time -needed to serialize the precompiled preamble for use in reparsing.</p> - -<p>Chained precompiled headers get their name because each precompiled header -can depend on one other precompiled header, forming a chain of -dependencies. A translation unit will then include the precompiled -header that starts the chain (i.e., nothing depends on it). This -linearity of dependencies is important for the semantic model of -chained precompiled headers, because the most-recent precompiled -header can provide information that overrides the information provided -by the precompiled headers it depends on, just like a header file -<code>B.h</code> that includes another header <code>A.h</code> can -modify the state produced by parsing <code>A.h</code>, e.g., by -<code>#undef</code>'ing a macro defined in <code>A.h</code>.</p> - -<p>There are several ways in which chained precompiled headers -generalize the AST file model:</p> - -<dl> - <dt>Numbering of IDs</dt> - <dd>Many different kinds of entities--identifiers, declarations, - types, etc.---have ID numbers that start at 1 or some other - predefined constant and grow upward. Each precompiled header records - the maximum ID number it has assigned in each category. Then, when a - new precompiled header is generated that depends on (chains to) - another precompiled header, it will start counting at the next - available ID number. This way, one can determine, given an ID - number, which AST file actually contains the entity.</dd> - - <dt>Name lookup</dt> - <dd>When writing a chained precompiled header, Clang attempts to - write only information that has changed from the precompiled header - on which it is based. This changes the lookup algorithm for the - various tables, such as the <a href="#idtable">identifier table</a>: - the search starts at the most-recent precompiled header. If no entry - is found, lookup then proceeds to the identifier table in the - precompiled header it depends on, and so one. Once a lookup - succeeds, that result is considered definitive, overriding any - results from earlier precompiled headers.</dd> - - <dt>Update records</dt> - <dd>There are various ways in which a later precompiled header can - modify the entities described in an earlier precompiled header. For - example, later precompiled headers can add entries into the various - name-lookup tables for the translation unit or namespaces, or add - new categories to an Objective-C class. Each of these updates is - captured in an "update record" that is stored in the chained - precompiled header file and will be loaded along with the original - entity.</dd> -</dl> - -<h2 id="modules">Modules</h2> - -<p>Modules generalize the chained precompiled header model yet -further, from a linear chain of precompiled headers to an arbitrary -directed acyclic graph (DAG) of AST files. All of the same techniques -used to make chained precompiled headers work---ID number, name -lookup, update records---are shared with modules. However, the DAG -nature of modules introduce a number of additional complications to -the model: - -<dl> - <dt>Numbering of IDs</dt> - <dd>The simple, linear numbering scheme used in chained precompiled - headers falls apart with the module DAG, because different modules - may end up with different numbering schemes for entities they - imported from common shared modules. To account for this, each - module file provides information about which modules it depends on - and which ID numbers it assigned to the entities in those modules, - as well as which ID numbers it took for its own new entities. The - AST reader then maps these "local" ID numbers into a "global" ID - number space for the current translation unit, providing a 1-1 - mapping between entities (in whatever AST file they inhabit) and - global ID numbers. If that translation unit is then serialized into - an AST file, this mapping will be stored for use when the AST file - is imported.</dd> - - <dt>Declaration merging</dt> - <dd>It is possible for a given entity (from the language's - perspective) to be declared multiple times in different places. For - example, two different headers can have the declaration of - <tt>printf</tt> or could forward-declare <tt>struct stat</tt>. If - each of those headers is included in a module, and some third party - imports both of those modules, there is a potentially serious - problem: name lookup for <tt>printf</tt> or <tt>struct stat</tt> will - find both declarations, but the AST nodes are unrelated. This would - result in a compilation error, due to an ambiguity in name - lookup. Therefore, the AST reader performs declaration merging - according to the appropriate language semantics, ensuring that the - two disjoint declarations are merged into a single redeclaration - chain (with a common canonical declaration), so that it is as if one - of the headers had been included before the other.</dd> - - <dt>Name Visibility</dt> - <dd>Modules allow certain names that occur during module creation to - be "hidden", so that they are not part of the public interface of - the module and are not visible to its clients. The AST reader - maintains a "visible" bit on various AST nodes (declarations, macros, - etc.) to indicate whether that particular AST node is currently - visible; the various name lookup mechanisms in Clang inspect the - visible bit to determine whether that entity, which is still in the - AST (because other, visible AST nodes may depend on it), can - actually be found by name lookup. When a new (sub)module is - imported, it may make existing, non-visible, already-deserialized - AST nodes visible; it is the responsibility of the AST reader to - find and update these AST nodes when it is notified of the import.</dd> - -</dl> - -</div> - -</body> -</html> diff --git a/docs/PCHInternals.rst b/docs/PCHInternals.rst new file mode 100644 index 0000000..a36e65c --- /dev/null +++ b/docs/PCHInternals.rst @@ -0,0 +1,561 @@ +======================================== +Precompiled Header and Modules Internals +======================================== + +.. contents:: + :local: + +This document describes the design and implementation of Clang's precompiled +headers (PCH) and modules. If you are interested in the end-user view, please +see the :ref:`User's Manual <usersmanual-precompiled-headers>`. + +Using Precompiled Headers with ``clang`` +---------------------------------------- + +The Clang compiler frontend, ``clang -cc1``, supports two command line options +for generating and using PCH files. + +To generate PCH files using ``clang -cc1``, use the option :option:`-emit-pch`: + +.. code-block:: bash + + $ clang -cc1 test.h -emit-pch -o test.h.pch + +This option is transparently used by ``clang`` when generating PCH files. The +resulting PCH file contains the serialized form of the compiler's internal +representation after it has completed parsing and semantic analysis. The PCH +file can then be used as a prefix header with the :option:`-include-pch` +option: + +.. code-block:: bash + + $ clang -cc1 -include-pch test.h.pch test.c -o test.s + +Design Philosophy +----------------- + +Precompiled headers are meant to improve overall compile times for projects, so +the design of precompiled headers is entirely driven by performance concerns. +The use case for precompiled headers is relatively simple: when there is a +common set of headers that is included in nearly every source file in the +project, we *precompile* that bundle of headers into a single precompiled +header (PCH file). Then, when compiling the source files in the project, we +load the PCH file first (as a prefix header), which acts as a stand-in for that +bundle of headers. + +A precompiled header implementation improves performance when: + +* Loading the PCH file is significantly faster than re-parsing the bundle of + headers stored within the PCH file. Thus, a precompiled header design + attempts to minimize the cost of reading the PCH file. Ideally, this cost + should not vary with the size of the precompiled header file. + +* The cost of generating the PCH file initially is not so large that it + counters the per-source-file performance improvement due to eliminating the + need to parse the bundled headers in the first place. This is particularly + important on multi-core systems, because PCH file generation serializes the + build when all compilations require the PCH file to be up-to-date. + +Modules, as implemented in Clang, use the same mechanisms as precompiled +headers to save a serialized AST file (one per module) and use those AST +modules. From an implementation standpoint, modules are a generalization of +precompiled headers, lifting a number of restrictions placed on precompiled +headers. In particular, there can only be one precompiled header and it must +be included at the beginning of the translation unit. The extensions to the +AST file format required for modules are discussed in the section on +:ref:`modules <pchinternals-modules>`. + +Clang's AST files are designed with a compact on-disk representation, which +minimizes both creation time and the time required to initially load the AST +file. The AST file itself contains a serialized representation of Clang's +abstract syntax trees and supporting data structures, stored using the same +compressed bitstream as `LLVM's bitcode file format +<http://llvm.org/docs/BitCodeFormat.html>`_. + +Clang's AST files are loaded "lazily" from disk. When an AST file is initially +loaded, Clang reads only a small amount of data from the AST file to establish +where certain important data structures are stored. The amount of data read in +this initial load is independent of the size of the AST file, such that a +larger AST file does not lead to longer AST load times. The actual header data +in the AST file --- macros, functions, variables, types, etc. --- is loaded +only when it is referenced from the user's code, at which point only that +entity (and those entities it depends on) are deserialized from the AST file. +With this approach, the cost of using an AST file for a translation unit is +proportional to the amount of code actually used from the AST file, rather than +being proportional to the size of the AST file itself. + +When given the :option:`-print-stats` option, Clang produces statistics +describing how much of the AST file was actually loaded from disk. For a +simple "Hello, World!" program that includes the Apple ``Cocoa.h`` header +(which is built as a precompiled header), this option illustrates how little of +the actual precompiled header is required: + +.. code-block:: none + + *** AST File Statistics: + 895/39981 source location entries read (2.238563%) + 19/15315 types read (0.124061%) + 20/82685 declarations read (0.024188%) + 154/58070 identifiers read (0.265197%) + 0/7260 selectors read (0.000000%) + 0/30842 statements read (0.000000%) + 4/8400 macros read (0.047619%) + 1/4995 lexical declcontexts read (0.020020%) + 0/4413 visible declcontexts read (0.000000%) + 0/7230 method pool entries read (0.000000%) + 0 method pool misses + +For this small program, only a tiny fraction of the source locations, types, +declarations, identifiers, and macros were actually deserialized from the +precompiled header. These statistics can be useful to determine whether the +AST file implementation can be improved by making more of the implementation +lazy. + +Precompiled headers can be chained. When you create a PCH while including an +existing PCH, Clang can create the new PCH by referencing the original file and +only writing the new data to the new file. For example, you could create a PCH +out of all the headers that are very commonly used throughout your project, and +then create a PCH for every single source file in the project that includes the +code that is specific to that file, so that recompiling the file itself is very +fast, without duplicating the data from the common headers for every file. The +mechanisms behind chained precompiled headers are discussed in a :ref:`later +section <pchinternals-chained>`. + +AST File Contents +----------------- + +Clang's AST files are organized into several different blocks, each of which +contains the serialized representation of a part of Clang's internal +representation. Each of the blocks corresponds to either a block or a record +within `LLVM's bitstream format <http://llvm.org/docs/BitCodeFormat.html>`_. +The contents of each of these logical blocks are described below. + +.. image:: PCHLayout.png + +For a given AST file, the `llvm-bcanalyzer +<http://llvm.org/docs/CommandGuide/llvm-bcanalyzer.html>`_ utility can be used +to examine the actual structure of the bitstream for the AST file. This +information can be used both to help understand the structure of the AST file +and to isolate areas where AST files can still be optimized, e.g., through the +introduction of abbreviations. + +Metadata Block +^^^^^^^^^^^^^^ + +The metadata block contains several records that provide information about how +the AST file was built. This metadata is primarily used to validate the use of +an AST file. For example, a precompiled header built for a 32-bit x86 target +cannot be used when compiling for a 64-bit x86 target. The metadata block +contains information about: + +Language options + Describes the particular language dialect used to compile the AST file, + including major options (e.g., Objective-C support) and more minor options + (e.g., support for "``//``" comments). The contents of this record correspond to + the ``LangOptions`` class. + +Target architecture + The target triple that describes the architecture, platform, and ABI for + which the AST file was generated, e.g., ``i386-apple-darwin9``. + +AST version + The major and minor version numbers of the AST file format. Changes in the + minor version number should not affect backward compatibility, while changes + in the major version number imply that a newer compiler cannot read an older + precompiled header (and vice-versa). + +Original file name + The full path of the header that was used to generate the AST file. + +Predefines buffer + Although not explicitly stored as part of the metadata, the predefines buffer + is used in the validation of the AST file. The predefines buffer itself + contains code generated by the compiler to initialize the preprocessor state + according to the current target, platform, and command-line options. For + example, the predefines buffer will contain "``#define __STDC__ 1``" when we + are compiling C without Microsoft extensions. The predefines buffer itself + is stored within the :ref:`pchinternals-sourcemgr`, but its contents are + verified along with the rest of the metadata. + +A chained PCH file (that is, one that references another PCH) and a module +(which may import other modules) have additional metadata containing the list +of all AST files that this AST file depends on. Each of those files will be +loaded along with this AST file. + +For chained precompiled headers, the language options, target architecture and +predefines buffer data is taken from the end of the chain, since they have to +match anyway. + +.. _pchinternals-sourcemgr: + +Source Manager Block +^^^^^^^^^^^^^^^^^^^^ + +The source manager block contains the serialized representation of Clang's +:ref:`SourceManager <SourceManager>` class, which handles the mapping from +source locations (as represented in Clang's abstract syntax tree) into actual +column/line positions within a source file or macro instantiation. The AST +file's representation of the source manager also includes information about all +of the headers that were (transitively) included when building the AST file. + +The bulk of the source manager block is dedicated to information about the +various files, buffers, and macro instantiations into which a source location +can refer. Each of these is referenced by a numeric "file ID", which is a +unique number (allocated starting at 1) stored in the source location. Clang +serializes the information for each kind of file ID, along with an index that +maps file IDs to the position within the AST file where the information about +that file ID is stored. The data associated with a file ID is loaded only when +required by the front end, e.g., to emit a diagnostic that includes a macro +instantiation history inside the header itself. + +The source manager block also contains information about all of the headers +that were included when building the AST file. This includes information about +the controlling macro for the header (e.g., when the preprocessor identified +that the contents of the header dependent on a macro like +``LLVM_CLANG_SOURCEMANAGER_H``). + +.. _pchinternals-preprocessor: + +Preprocessor Block +^^^^^^^^^^^^^^^^^^ + +The preprocessor block contains the serialized representation of the +preprocessor. Specifically, it contains all of the macros that have been +defined by the end of the header used to build the AST file, along with the +token sequences that comprise each macro. The macro definitions are only read +from the AST file when the name of the macro first occurs in the program. This +lazy loading of macro definitions is triggered by lookups into the +:ref:`identifier table <pchinternals-ident-table>`. + +.. _pchinternals-types: + +Types Block +^^^^^^^^^^^ + +The types block contains the serialized representation of all of the types +referenced in the translation unit. Each Clang type node (``PointerType``, +``FunctionProtoType``, etc.) has a corresponding record type in the AST file. +When types are deserialized from the AST file, the data within the record is +used to reconstruct the appropriate type node using the AST context. + +Each type has a unique type ID, which is an integer that uniquely identifies +that type. Type ID 0 represents the NULL type, type IDs less than +``NUM_PREDEF_TYPE_IDS`` represent predefined types (``void``, ``float``, etc.), +while other "user-defined" type IDs are assigned consecutively from +``NUM_PREDEF_TYPE_IDS`` upward as the types are encountered. The AST file has +an associated mapping from the user-defined types block to the location within +the types block where the serialized representation of that type resides, +enabling lazy deserialization of types. When a type is referenced from within +the AST file, that reference is encoded using the type ID shifted left by 3 +bits. The lower three bits are used to represent the ``const``, ``volatile``, +and ``restrict`` qualifiers, as in Clang's :ref:`QualType <QualType>` class. + +.. _pchinternals-decls: + +Declarations Block +^^^^^^^^^^^^^^^^^^ + +The declarations block contains the serialized representation of all of the +declarations referenced in the translation unit. Each Clang declaration node +(``VarDecl``, ``FunctionDecl``, etc.) has a corresponding record type in the +AST file. When declarations are deserialized from the AST file, the data +within the record is used to build and populate a new instance of the +corresponding ``Decl`` node. As with types, each declaration node has a +numeric ID that is used to refer to that declaration within the AST file. In +addition, a lookup table provides a mapping from that numeric ID to the offset +within the precompiled header where that declaration is described. + +Declarations in Clang's abstract syntax trees are stored hierarchically. At +the top of the hierarchy is the translation unit (``TranslationUnitDecl``), +which contains all of the declarations in the translation unit but is not +actually written as a specific declaration node. Its child declarations (such +as functions or struct types) may also contain other declarations inside them, +and so on. Within Clang, each declaration is stored within a :ref:`declaration +context <DeclContext>`, as represented by the ``DeclContext`` class. +Declaration contexts provide the mechanism to perform name lookup within a +given declaration (e.g., find the member named ``x`` in a structure) and +iterate over the declarations stored within a context (e.g., iterate over all +of the fields of a structure for structure layout). + +In Clang's AST file format, deserializing a declaration that is a +``DeclContext`` is a separate operation from deserializing all of the +declarations stored within that declaration context. Therefore, Clang will +deserialize the translation unit declaration without deserializing the +declarations within that translation unit. When required, the declarations +stored within a declaration context will be deserialized. There are two +representations of the declarations within a declaration context, which +correspond to the name-lookup and iteration behavior described above: + +* When the front end performs name lookup to find a name ``x`` within a given + declaration context (for example, during semantic analysis of the expression + ``p->x``, where ``p``'s type is defined in the precompiled header), Clang + refers to an on-disk hash table that maps from the names within that + declaration context to the declaration IDs that represent each visible + declaration with that name. The actual declarations will then be + deserialized to provide the results of name lookup. +* When the front end performs iteration over all of the declarations within a + declaration context, all of those declarations are immediately + de-serialized. For large declaration contexts (e.g., the translation unit), + this operation is expensive; however, large declaration contexts are not + traversed in normal compilation, since such a traversal is unnecessary. + However, it is common for the code generator and semantic analysis to + traverse declaration contexts for structs, classes, unions, and + enumerations, although those contexts contain relatively few declarations in + the common case. + +Statements and Expressions +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Statements and expressions are stored in the AST file in both the :ref:`types +<pchinternals-types>` and the :ref:`declarations <pchinternals-decls>` blocks, +because every statement or expression will be associated with either a type or +declaration. The actual statement and expression records are stored +immediately following the declaration or type that owns the statement or +expression. For example, the statement representing the body of a function +will be stored directly following the declaration of the function. + +As with types and declarations, each statement and expression kind in Clang's +abstract syntax tree (``ForStmt``, ``CallExpr``, etc.) has a corresponding +record type in the AST file, which contains the serialized representation of +that statement or expression. Each substatement or subexpression within an +expression is stored as a separate record (which keeps most records to a fixed +size). Within the AST file, the subexpressions of an expression are stored, in +reverse order, prior to the expression that owns those expression, using a form +of `Reverse Polish Notation +<http://en.wikipedia.org/wiki/Reverse_Polish_notation>`_. For example, an +expression ``3 - 4 + 5`` would be represented as follows: + ++-----------------------+ +| ``IntegerLiteral(5)`` | ++-----------------------+ +| ``IntegerLiteral(4)`` | ++-----------------------+ +| ``IntegerLiteral(3)`` | ++-----------------------+ +| ``IntegerLiteral(-)`` | ++-----------------------+ +| ``IntegerLiteral(+)`` | ++-----------------------+ +| ``STOP`` | ++-----------------------+ + +When reading this representation, Clang evaluates each expression record it +encounters, builds the appropriate abstract syntax tree node, and then pushes +that expression on to a stack. When a record contains *N* subexpressions --- +``BinaryOperator`` has two of them --- those expressions are popped from the +top of the stack. The special STOP code indicates that we have reached the end +of a serialized expression or statement; other expression or statement records +may follow, but they are part of a different expression. + +.. _pchinternals-ident-table: + +Identifier Table Block +^^^^^^^^^^^^^^^^^^^^^^ + +The identifier table block contains an on-disk hash table that maps each +identifier mentioned within the AST file to the serialized representation of +the identifier's information (e.g, the ``IdentifierInfo`` structure). The +serialized representation contains: + +* The actual identifier string. +* Flags that describe whether this identifier is the name of a built-in, a + poisoned identifier, an extension token, or a macro. +* If the identifier names a macro, the offset of the macro definition within + the :ref:`pchinternals-preprocessor`. +* If the identifier names one or more declarations visible from translation + unit scope, the :ref:`declaration IDs <pchinternals-decls>` of these + declarations. + +When an AST file is loaded, the AST file reader mechanism introduces itself +into the identifier table as an external lookup source. Thus, when the user +program refers to an identifier that has not yet been seen, Clang will perform +a lookup into the identifier table. If an identifier is found, its contents +(macro definitions, flags, top-level declarations, etc.) will be deserialized, +at which point the corresponding ``IdentifierInfo`` structure will have the +same contents it would have after parsing the headers in the AST file. + +Within the AST file, the identifiers used to name declarations are represented +with an integral value. A separate table provides a mapping from this integral +value (the identifier ID) to the location within the on-disk hash table where +that identifier is stored. This mapping is used when deserializing the name of +a declaration, the identifier of a token, or any other construct in the AST +file that refers to a name. + +.. _pchinternals-method-pool: + +Method Pool Block +^^^^^^^^^^^^^^^^^ + +The method pool block is represented as an on-disk hash table that serves two +purposes: it provides a mapping from the names of Objective-C selectors to the +set of Objective-C instance and class methods that have that particular +selector (which is required for semantic analysis in Objective-C) and also +stores all of the selectors used by entities within the AST file. The design +of the method pool is similar to that of the :ref:`identifier table +<pchinternals-ident-table>`: the first time a particular selector is formed +during the compilation of the program, Clang will search in the on-disk hash +table of selectors; if found, Clang will read the Objective-C methods +associated with that selector into the appropriate front-end data structure +(``Sema::InstanceMethodPool`` and ``Sema::FactoryMethodPool`` for instance and +class methods, respectively). + +As with identifiers, selectors are represented by numeric values within the AST +file. A separate index maps these numeric selector values to the offset of the +selector within the on-disk hash table, and will be used when de-serializing an +Objective-C method declaration (or other Objective-C construct) that refers to +the selector. + +AST Reader Integration Points +----------------------------- + +The "lazy" deserialization behavior of AST files requires their integration +into several completely different submodules of Clang. For example, lazily +deserializing the declarations during name lookup requires that the name-lookup +routines be able to query the AST file to find entities stored there. + +For each Clang data structure that requires direct interaction with the AST +reader logic, there is an abstract class that provides the interface between +the two modules. The ``ASTReader`` class, which handles the loading of an AST +file, inherits from all of these abstract classes to provide lazy +deserialization of Clang's data structures. ``ASTReader`` implements the +following abstract classes: + +``ExternalSLocEntrySource`` + This abstract interface is associated with the ``SourceManager`` class, and + is used whenever the :ref:`source manager <pchinternals-sourcemgr>` needs to + load the details of a file, buffer, or macro instantiation. + +``IdentifierInfoLookup`` + This abstract interface is associated with the ``IdentifierTable`` class, and + is used whenever the program source refers to an identifier that has not yet + been seen. In this case, the AST reader searches for this identifier within + its :ref:`identifier table <pchinternals-ident-table>` to load any top-level + declarations or macros associated with that identifier. + +``ExternalASTSource`` + This abstract interface is associated with the ``ASTContext`` class, and is + used whenever the abstract syntax tree nodes need to loaded from the AST + file. It provides the ability to de-serialize declarations and types + identified by their numeric values, read the bodies of functions when + required, and read the declarations stored within a declaration context + (either for iteration or for name lookup). + +``ExternalSemaSource`` + This abstract interface is associated with the ``Sema`` class, and is used + whenever semantic analysis needs to read information from the :ref:`global + method pool <pchinternals-method-pool>`. + +.. _pchinternals-chained: + +Chained precompiled headers +--------------------------- + +Chained precompiled headers were initially intended to improve the performance +of IDE-centric operations such as syntax highlighting and code completion while +a particular source file is being edited by the user. To minimize the amount +of reparsing required after a change to the file, a form of precompiled header +--- called a precompiled *preamble* --- is automatically generated by parsing +all of the headers in the source file, up to and including the last +``#include``. When only the source file changes (and none of the headers it +depends on), reparsing of that source file can use the precompiled preamble and +start parsing after the ``#include``\ s, so parsing time is proportional to the +size of the source file (rather than all of its includes). However, the +compilation of that translation unit may already use a precompiled header: in +this case, Clang will create the precompiled preamble as a chained precompiled +header that refers to the original precompiled header. This drastically +reduces the time needed to serialize the precompiled preamble for use in +reparsing. + +Chained precompiled headers get their name because each precompiled header can +depend on one other precompiled header, forming a chain of dependencies. A +translation unit will then include the precompiled header that starts the chain +(i.e., nothing depends on it). This linearity of dependencies is important for +the semantic model of chained precompiled headers, because the most-recent +precompiled header can provide information that overrides the information +provided by the precompiled headers it depends on, just like a header file +``B.h`` that includes another header ``A.h`` can modify the state produced by +parsing ``A.h``, e.g., by ``#undef``'ing a macro defined in ``A.h``. + +There are several ways in which chained precompiled headers generalize the AST +file model: + +Numbering of IDs + Many different kinds of entities --- identifiers, declarations, types, etc. + --- have ID numbers that start at 1 or some other predefined constant and + grow upward. Each precompiled header records the maximum ID number it has + assigned in each category. Then, when a new precompiled header is generated + that depends on (chains to) another precompiled header, it will start + counting at the next available ID number. This way, one can determine, given + an ID number, which AST file actually contains the entity. + +Name lookup + When writing a chained precompiled header, Clang attempts to write only + information that has changed from the precompiled header on which it is + based. This changes the lookup algorithm for the various tables, such as the + :ref:`identifier table <pchinternals-ident-table>`: the search starts at the + most-recent precompiled header. If no entry is found, lookup then proceeds + to the identifier table in the precompiled header it depends on, and so one. + Once a lookup succeeds, that result is considered definitive, overriding any + results from earlier precompiled headers. + +Update records + There are various ways in which a later precompiled header can modify the + entities described in an earlier precompiled header. For example, later + precompiled headers can add entries into the various name-lookup tables for + the translation unit or namespaces, or add new categories to an Objective-C + class. Each of these updates is captured in an "update record" that is + stored in the chained precompiled header file and will be loaded along with + the original entity. + +.. _pchinternals-modules: + +Modules +------- + +Modules generalize the chained precompiled header model yet further, from a +linear chain of precompiled headers to an arbitrary directed acyclic graph +(DAG) of AST files. All of the same techniques used to make chained +precompiled headers work --- ID number, name lookup, update records --- are +shared with modules. However, the DAG nature of modules introduce a number of +additional complications to the model: + +Numbering of IDs + The simple, linear numbering scheme used in chained precompiled headers falls + apart with the module DAG, because different modules may end up with + different numbering schemes for entities they imported from common shared + modules. To account for this, each module file provides information about + which modules it depends on and which ID numbers it assigned to the entities + in those modules, as well as which ID numbers it took for its own new + entities. The AST reader then maps these "local" ID numbers into a "global" + ID number space for the current translation unit, providing a 1-1 mapping + between entities (in whatever AST file they inhabit) and global ID numbers. + If that translation unit is then serialized into an AST file, this mapping + will be stored for use when the AST file is imported. + +Declaration merging + It is possible for a given entity (from the language's perspective) to be + declared multiple times in different places. For example, two different + headers can have the declaration of ``printf`` or could forward-declare + ``struct stat``. If each of those headers is included in a module, and some + third party imports both of those modules, there is a potentially serious + problem: name lookup for ``printf`` or ``struct stat`` will find both + declarations, but the AST nodes are unrelated. This would result in a + compilation error, due to an ambiguity in name lookup. Therefore, the AST + reader performs declaration merging according to the appropriate language + semantics, ensuring that the two disjoint declarations are merged into a + single redeclaration chain (with a common canonical declaration), so that it + is as if one of the headers had been included before the other. + +Name Visibility + Modules allow certain names that occur during module creation to be "hidden", + so that they are not part of the public interface of the module and are not + visible to its clients. The AST reader maintains a "visible" bit on various + AST nodes (declarations, macros, etc.) to indicate whether that particular + AST node is currently visible; the various name lookup mechanisms in Clang + inspect the visible bit to determine whether that entity, which is still in + the AST (because other, visible AST nodes may depend on it), can actually be + found by name lookup. When a new (sub)module is imported, it may make + existing, non-visible, already-deserialized AST nodes visible; it is the + responsibility of the AST reader to find and update these AST nodes when it + is notified of the import. + diff --git a/docs/PTHInternals.html b/docs/PTHInternals.html deleted file mode 100644 index b15f681..0000000 --- a/docs/PTHInternals.html +++ /dev/null @@ -1,179 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> - <head> - <title>Pretokenized Headers (PTH)</title> - <link type="text/css" rel="stylesheet" href="../menu.css"> - <link type="text/css" rel="stylesheet" href="../content.css"> - <style type="text/css"> - td { - vertical-align: top; - } - </style> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>Pretokenized Headers (PTH)</h1> - -<p>This document first describes the low-level -interface for using PTH and then briefly elaborates on its design and -implementation. If you are interested in the end-user view, please see the -<a href="UsersManual.html#precompiledheaders">User's Manual</a>.</p> - - -<h2>Using Pretokenized Headers with <tt>clang</tt> (Low-level Interface)</h2> - -<p>The Clang compiler frontend, <tt>clang -cc1</tt>, supports three command line -options for generating and using PTH files.<p> - -<p>To generate PTH files using <tt>clang -cc1</tt>, use the option -<b><tt>-emit-pth</tt></b>: - -<pre> $ clang -cc1 test.h -emit-pth -o test.h.pth </pre> - -<p>This option is transparently used by <tt>clang</tt> when generating PTH -files. Similarly, PTH files can be used as prefix headers using the -<b><tt>-include-pth</tt></b> option:</p> - -<pre> - $ clang -cc1 -include-pth test.h.pth test.c -o test.s -</pre> - -<p>Alternatively, Clang's PTH files can be used as a raw "token-cache" -(or "content" cache) of the source included by the original header -file. This means that the contents of the PTH file are searched as substitutes -for <em>any</em> source files that are used by <tt>clang -cc1</tt> to process a -source file. This is done by specifying the <b><tt>-token-cache</tt></b> -option:</p> - -<pre> - $ cat test.h - #include <stdio.h> - $ clang -cc1 -emit-pth test.h -o test.h.pth - $ cat test.c - #include "test.h" - $ clang -cc1 test.c -o test -token-cache test.h.pth -</pre> - -<p>In this example the contents of <tt>stdio.h</tt> (and the files it includes) -will be retrieved from <tt>test.h.pth</tt>, as the PTH file is being used in -this case as a raw cache of the contents of <tt>test.h</tt>. This is a low-level -interface used to both implement the high-level PTH interface as well as to -provide alternative means to use PTH-style caching.</p> - -<h2>PTH Design and Implementation</h2> - -<p>Unlike GCC's precompiled headers, which cache the full ASTs and preprocessor -state of a header file, Clang's pretokenized header files mainly cache the raw -lexer <em>tokens</em> that are needed to segment the stream of characters in a -source file into keywords, identifiers, and operators. Consequently, PTH serves -to mainly directly speed up the lexing and preprocessing of a source file, while -parsing and type-checking must be completely redone every time a PTH file is -used.</p> - -<h3>Basic Design Tradeoffs</h3> - -<p>In the long term there are plans to provide an alternate PCH implementation -for Clang that also caches the work for parsing and type checking the contents -of header files. The current implementation of PCH in Clang as pretokenized -header files was motivated by the following factors:<p> - -<ul> - -<li><p><b>Language independence</b>: PTH files work with any language that -Clang's lexer can handle, including C, Objective-C, and (in the early stages) -C++. This means development on language features at the parsing level or above -(which is basically almost all interesting pieces) does not require PTH to be -modified.</p></li> - -<li><b>Simple design</b>: Relatively speaking, PTH has a simple design and -implementation, making it easy to test. Further, because the machinery for PTH -resides at the lower-levels of the Clang library stack it is fairly -straightforward to profile and optimize.</li> -</ul> - -<p>Further, compared to GCC's PCH implementation (which is the dominate -precompiled header file implementation that Clang can be directly compared -against) the PTH design in Clang yields several attractive features:</p> - -<ul> - -<li><p><b>Architecture independence</b>: In contrast to GCC's PCH files (and -those of several other compilers), Clang's PTH files are architecture -independent, requiring only a single PTH file when building an program for -multiple architectures.</p> - -<p>For example, on Mac OS X one may wish to -compile a "universal binary" that runs on PowerPC, 32-bit Intel -(i386), and 64-bit Intel architectures. In contrast, GCC requires a PCH file for -each architecture, as the definitions of types in the AST are -architecture-specific. Since a Clang PTH file essentially represents a lexical -cache of header files, a single PTH file can be safely used when compiling for -multiple architectures. This can also reduce compile times because only a single -PTH file needs to be generated during a build instead of several.</p></li> - -<li><p><b>Reduced memory pressure</b>: Similar to GCC, -Clang reads PTH files via the use of memory mapping (i.e., <tt>mmap</tt>). -Clang, however, memory maps PTH files as read-only, meaning that multiple -invocations of <tt>clang -cc1</tt> can share the same pages in memory from a -memory-mapped PTH file. In comparison, GCC also memory maps its PCH files but -also modifies those pages in memory, incurring the copy-on-write costs. The -read-only nature of PTH can greatly reduce memory pressure for builds involving -multiple cores, thus improving overall scalability.</p></li> - -<li><p><b>Fast generation</b>: PTH files can be generated in a small fraction -of the time needed to generate GCC's PCH files. Since PTH/PCH generation is a -serial operation that typically blocks progress during a build, faster -generation time leads to improved processor utilization with parallel builds on -multicore machines.</p></li> - -</ul> - -<p>Despite these strengths, PTH's simple design suffers some algorithmic -handicaps compared to other PCH strategies such as those used by GCC. While PTH -can greatly speed up the processing time of a header file, the amount of work -required to process a header file is still roughly linear in the size of the -header file. In contrast, the amount of work done by GCC to process a -precompiled header is (theoretically) constant (the ASTs for the header are -literally memory mapped into the compiler). This means that only the pieces of -the header file that are referenced by the source file including the header are -the only ones the compiler needs to process during actual compilation. While -GCC's particular implementation of PCH mitigates some of these algorithmic -strengths via the use of copy-on-write pages, the approach itself can -fundamentally dominate at an algorithmic level, especially when one considers -header files of arbitrary size.</p> - -<p>There are plans to potentially implement an complementary PCH implementation -for Clang based on the lazy deserialization of ASTs. This approach would -theoretically have the same constant-time algorithmic advantages just mentioned -but would also retain some of the strengths of PTH such as reduced memory -pressure (ideal for multi-core builds).</p> - -<h3>Internal PTH Optimizations</h3> - -<p>While the main optimization employed by PTH is to reduce lexing time of -header files by caching pre-lexed tokens, PTH also employs several other -optimizations to speed up the processing of header files:</p> - -<ul> - -<li><p><em><tt>stat</tt> caching</em>: PTH files cache information obtained via -calls to <tt>stat</tt> that <tt>clang -cc1</tt> uses to resolve which files are -included by <tt>#include</tt> directives. This greatly reduces the overhead -involved in context-switching to the kernel to resolve included files.</p></li> - -<li><p><em>Fasting skipping of <tt>#ifdef</tt>...<tt>#endif</tt> chains</em>: -PTH files record the basic structure of nested preprocessor blocks. When the -condition of the preprocessor block is false, all of its tokens are immediately -skipped instead of requiring them to be handled by Clang's -preprocessor.</p></li> - -</ul> - -</div> -</body> -</html> diff --git a/docs/PTHInternals.rst b/docs/PTHInternals.rst new file mode 100644 index 0000000..10dda61 --- /dev/null +++ b/docs/PTHInternals.rst @@ -0,0 +1,163 @@ +========================== +Pretokenized Headers (PTH) +========================== + +This document first describes the low-level interface for using PTH and +then briefly elaborates on its design and implementation. If you are +interested in the end-user view, please see the :ref:`User's Manual +<usersmanual-precompiled-headers>`. + +Using Pretokenized Headers with ``clang`` (Low-level Interface) +=============================================================== + +The Clang compiler frontend, ``clang -cc1``, supports three command line +options for generating and using PTH files. + +To generate PTH files using ``clang -cc1``, use the option ``-emit-pth``: + +.. code-block:: console + + $ clang -cc1 test.h -emit-pth -o test.h.pth + +This option is transparently used by ``clang`` when generating PTH +files. Similarly, PTH files can be used as prefix headers using the +``-include-pth`` option: + +.. code-block:: console + + $ clang -cc1 -include-pth test.h.pth test.c -o test.s + +Alternatively, Clang's PTH files can be used as a raw "token-cache" (or +"content" cache) of the source included by the original header file. +This means that the contents of the PTH file are searched as substitutes +for *any* source files that are used by ``clang -cc1`` to process a +source file. This is done by specifying the ``-token-cache`` option: + +.. code-block:: console + + $ cat test.h + #include <stdio.h> + $ clang -cc1 -emit-pth test.h -o test.h.pth + $ cat test.c + #include "test.h" + $ clang -cc1 test.c -o test -token-cache test.h.pth + +In this example the contents of ``stdio.h`` (and the files it includes) +will be retrieved from ``test.h.pth``, as the PTH file is being used in +this case as a raw cache of the contents of ``test.h``. This is a +low-level interface used to both implement the high-level PTH interface +as well as to provide alternative means to use PTH-style caching. + +PTH Design and Implementation +============================= + +Unlike GCC's precompiled headers, which cache the full ASTs and +preprocessor state of a header file, Clang's pretokenized header files +mainly cache the raw lexer *tokens* that are needed to segment the +stream of characters in a source file into keywords, identifiers, and +operators. Consequently, PTH serves to mainly directly speed up the +lexing and preprocessing of a source file, while parsing and +type-checking must be completely redone every time a PTH file is used. + +Basic Design Tradeoffs +---------------------- + +In the long term there are plans to provide an alternate PCH +implementation for Clang that also caches the work for parsing and type +checking the contents of header files. The current implementation of PCH +in Clang as pretokenized header files was motivated by the following +factors: + +**Language independence** + PTH files work with any language that + Clang's lexer can handle, including C, Objective-C, and (in the early + stages) C++. This means development on language features at the + parsing level or above (which is basically almost all interesting + pieces) does not require PTH to be modified. + +**Simple design** + Relatively speaking, PTH has a simple design and + implementation, making it easy to test. Further, because the + machinery for PTH resides at the lower-levels of the Clang library + stack it is fairly straightforward to profile and optimize. + +Further, compared to GCC's PCH implementation (which is the dominate +precompiled header file implementation that Clang can be directly +compared against) the PTH design in Clang yields several attractive +features: + +**Architecture independence** + In contrast to GCC's PCH files (and + those of several other compilers), Clang's PTH files are architecture + independent, requiring only a single PTH file when building a + program for multiple architectures. + + For example, on Mac OS X one may wish to compile a "universal binary" + that runs on PowerPC, 32-bit Intel (i386), and 64-bit Intel + architectures. In contrast, GCC requires a PCH file for each + architecture, as the definitions of types in the AST are + architecture-specific. Since a Clang PTH file essentially represents + a lexical cache of header files, a single PTH file can be safely used + when compiling for multiple architectures. This can also reduce + compile times because only a single PTH file needs to be generated + during a build instead of several. + +**Reduced memory pressure** + Similar to GCC, Clang reads PTH files + via the use of memory mapping (i.e., ``mmap``). Clang, however, + memory maps PTH files as read-only, meaning that multiple invocations + of ``clang -cc1`` can share the same pages in memory from a + memory-mapped PTH file. In comparison, GCC also memory maps its PCH + files but also modifies those pages in memory, incurring the + copy-on-write costs. The read-only nature of PTH can greatly reduce + memory pressure for builds involving multiple cores, thus improving + overall scalability. + +**Fast generation** + PTH files can be generated in a small fraction + of the time needed to generate GCC's PCH files. Since PTH/PCH + generation is a serial operation that typically blocks progress + during a build, faster generation time leads to improved processor + utilization with parallel builds on multicore machines. + +Despite these strengths, PTH's simple design suffers some algorithmic +handicaps compared to other PCH strategies such as those used by GCC. +While PTH can greatly speed up the processing time of a header file, the +amount of work required to process a header file is still roughly linear +in the size of the header file. In contrast, the amount of work done by +GCC to process a precompiled header is (theoretically) constant (the +ASTs for the header are literally memory mapped into the compiler). This +means that only the pieces of the header file that are referenced by the +source file including the header are the only ones the compiler needs to +process during actual compilation. While GCC's particular implementation +of PCH mitigates some of these algorithmic strengths via the use of +copy-on-write pages, the approach itself can fundamentally dominate at +an algorithmic level, especially when one considers header files of +arbitrary size. + +There are plans to potentially implement an complementary PCH +implementation for Clang based on the lazy deserialization of ASTs. This +approach would theoretically have the same constant-time algorithmic +advantages just mentioned but would also retain some of the strengths of +PTH such as reduced memory pressure (ideal for multi-core builds). + +Internal PTH Optimizations +-------------------------- + +While the main optimization employed by PTH is to reduce lexing time of +header files by caching pre-lexed tokens, PTH also employs several other +optimizations to speed up the processing of header files: + +- ``stat`` caching: PTH files cache information obtained via calls to + ``stat`` that ``clang -cc1`` uses to resolve which files are included + by ``#include`` directives. This greatly reduces the overhead + involved in context-switching to the kernel to resolve included + files. + +- Fast skipping of ``#ifdef`` ... ``#endif`` chains: PTH files + record the basic structure of nested preprocessor blocks. When the + condition of the preprocessor block is false, all of its tokens are + immediately skipped instead of requiring them to be handled by + Clang's preprocessor. + + diff --git a/docs/RAVFrontendAction.html b/docs/RAVFrontendAction.html deleted file mode 100644 index b30cd57..0000000 --- a/docs/RAVFrontendAction.html +++ /dev/null @@ -1,224 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>How to write RecursiveASTVisitor based ASTFrontendActions.</title> -<link type="text/css" rel="stylesheet" href="../menu.css"> -<link type="text/css" rel="stylesheet" href="../content.css"> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>How to write RecursiveASTVisitor based ASTFrontendActions.</h1> - -<!-- ======================================================================= --> -<h2 id="intro">Introduction</h2> -<!-- ======================================================================= --> - -In this tutorial you will learn how to create a FrontendAction that uses -a RecursiveASTVisitor to find CXXRecordDecl AST nodes with a specified name. - -<!-- ======================================================================= --> -<h2 id="action">Creating a FrontendAction</h2> -<!-- ======================================================================= --> - -<p>When writing a clang based tool like a Clang Plugin or a standalone tool -based on LibTooling, the common entry point is the FrontendAction. -FrontendAction is an interface that allows execution of user specific actions -as part of the compilation. To run tools over the AST clang provides the -convenience interface ASTFrontendAction, which takes care of executing the -action. The only part left is to implement the CreateASTConsumer method that -returns an ASTConsumer per translation unit.</p> -<pre> - class FindNamedClassAction : public clang::ASTFrontendAction { - public: - virtual clang::ASTConsumer *CreateASTConsumer( - clang::CompilerInstance &Compiler, llvm::StringRef InFile) { - return new FindNamedClassConsumer; - } - }; -</pre> - -<!-- ======================================================================= --> -<h2 id="consumer">Creating an ASTConsumer</h2> -<!-- ======================================================================= --> - -<p>ASTConsumer is an interface used to write generic actions on an AST, -regardless of how the AST was produced. ASTConsumer provides many different -entry points, but for our use case the only one needed is HandleTranslationUnit, -which is called with the ASTContext for the translation unit.</p> -<pre> - class FindNamedClassConsumer : public clang::ASTConsumer { - public: - virtual void HandleTranslationUnit(clang::ASTContext &Context) { - // Traversing the translation unit decl via a RecursiveASTVisitor - // will visit all nodes in the AST. - Visitor.TraverseDecl(Context.getTranslationUnitDecl()); - } - private: - // A RecursiveASTVisitor implementation. - FindNamedClassVisitor Visitor; - }; -</pre> - -<!-- ======================================================================= --> -<h2 id="rav">Using the RecursiveASTVisitor</h2> -<!-- ======================================================================= --> - -<p>Now that everything is hooked up, the next step is to implement a -RecursiveASTVisitor to extract the relevant information from the AST.</p> -<p>The RecursiveASTVisitor provides hooks of the form -bool VisitNodeType(NodeType *) for most AST nodes; the exception are TypeLoc -nodes, which are passed by-value. We only need to implement the methods for the -relevant node types. -</p> -<p>Let's start by writing a RecursiveASTVisitor that visits all CXXRecordDecl's. -<pre> - class FindNamedClassVisitor - : public RecursiveASTVisitor<FindNamedClassVisitor> { - public: - bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { - // For debugging, dumping the AST nodes will show which nodes are already - // being visited. - Declaration->dump(); - - // The return value indicates whether we want the visitation to proceed. - // Return false to stop the traversal of the AST. - return true; - } - }; -</pre> -</p> -<p>In the methods of our RecursiveASTVisitor we can now use the full power of -the Clang AST to drill through to the parts that are interesting for us. For -example, to find all class declaration with a certain name, we can check for a -specific qualified name: -<pre> - bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { - if (Declaration->getQualifiedNameAsString() == "n::m::C") - Declaration->dump(); - return true; - } -</pre> -</p> - -<!-- ======================================================================= --> -<h2 id="context">Accessing the SourceManager and ASTContext</h2> -<!-- ======================================================================= --> - -<p>Some of the information about the AST, like source locations and global -identifier information, are not stored in the AST nodes themselves, but in -the ASTContext and its associated source manager. To retrieve them we need to -hand the ASTContext into our RecursiveASTVisitor implementation.</p> -<p>The ASTContext is available from the CompilerInstance during the call -to CreateASTConsumer. We can thus extract it there and hand it into our -freshly created FindNamedClassConsumer:</p> -<pre> - virtual clang::ASTConsumer *CreateASTConsumer( - clang::CompilerInstance &Compiler, llvm::StringRef InFile) { - return new FindNamedClassConsumer(<b>&Compiler.getASTContext()</b>); - } -</pre> - -<p>Now that the ASTContext is available in the RecursiveASTVisitor, we can do -more interesting things with AST nodes, like looking up their source -locations:</p> -<pre> - bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { - if (Declaration->getQualifiedNameAsString() == "n::m::C") { - // getFullLoc uses the ASTContext's SourceManager to resolve the source - // location and break it up into its line and column parts. - FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart()); - if (FullLocation.isValid()) - llvm::outs() << "Found declaration at " - << FullLocation.getSpellingLineNumber() << ":" - << FullLocation.getSpellingColumnNumber() << "\n"; - } - return true; - } -</pre> - -<!-- ======================================================================= --> -<h2 id="full">Putting it all together</h2> -<!-- ======================================================================= --> - -<p>Now we can combine all of the above into a small example program:</p> -<pre> - #include "clang/AST/ASTConsumer.h" - #include "clang/AST/RecursiveASTVisitor.h" - #include "clang/Frontend/CompilerInstance.h" - #include "clang/Frontend/FrontendAction.h" - #include "clang/Tooling/Tooling.h" - - using namespace clang; - - class FindNamedClassVisitor - : public RecursiveASTVisitor<FindNamedClassVisitor> { - public: - explicit FindNamedClassVisitor(ASTContext *Context) - : Context(Context) {} - - bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { - if (Declaration->getQualifiedNameAsString() == "n::m::C") { - FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart()); - if (FullLocation.isValid()) - llvm::outs() << "Found declaration at " - << FullLocation.getSpellingLineNumber() << ":" - << FullLocation.getSpellingColumnNumber() << "\n"; - } - return true; - } - - private: - ASTContext *Context; - }; - - class FindNamedClassConsumer : public clang::ASTConsumer { - public: - explicit FindNamedClassConsumer(ASTContext *Context) - : Visitor(Context) {} - - virtual void HandleTranslationUnit(clang::ASTContext &Context) { - Visitor.TraverseDecl(Context.getTranslationUnitDecl()); - } - private: - FindNamedClassVisitor Visitor; - }; - - class FindNamedClassAction : public clang::ASTFrontendAction { - public: - virtual clang::ASTConsumer *CreateASTConsumer( - clang::CompilerInstance &Compiler, llvm::StringRef InFile) { - return new FindNamedClassConsumer(&Compiler.getASTContext()); - } - }; - - int main(int argc, char **argv) { - if (argc > 1) { - clang::tooling::runToolOnCode(new FindNamedClassAction, argv[1]); - } - } -</pre> - -<p>We store this into a file called FindClassDecls.cpp and create the following -CMakeLists.txt to link it:</p> -<pre> -set(LLVM_USED_LIBS clangTooling) - -add_clang_executable(find-class-decls FindClassDecls.cpp) -</pre> - -<p>When running this tool over a small code snippet it will output all -declarations of a class n::m::C it found:</p> -<pre> - $ ./bin/find-class-decls "namespace n { namespace m { class C {}; } }" - Found declaration at 1:29 -</pre> - -</div> -</body> -</html> - diff --git a/docs/RAVFrontendAction.rst b/docs/RAVFrontendAction.rst new file mode 100644 index 0000000..2f60ce9 --- /dev/null +++ b/docs/RAVFrontendAction.rst @@ -0,0 +1,216 @@ +========================================================== +How to write RecursiveASTVisitor based ASTFrontendActions. +========================================================== + +Introduction +============ + +In this tutorial you will learn how to create a FrontendAction that uses +a RecursiveASTVisitor to find CXXRecordDecl AST nodes with a specified +name. + +Creating a FrontendAction +========================= + +When writing a clang based tool like a Clang Plugin or a standalone tool +based on LibTooling, the common entry point is the FrontendAction. +FrontendAction is an interface that allows execution of user specific +actions as part of the compilation. To run tools over the AST clang +provides the convenience interface ASTFrontendAction, which takes care +of executing the action. The only part left is to implement the +CreateASTConsumer method that returns an ASTConsumer per translation +unit. + +:: + + class FindNamedClassAction : public clang::ASTFrontendAction { + public: + virtual clang::ASTConsumer *CreateASTConsumer( + clang::CompilerInstance &Compiler, llvm::StringRef InFile) { + return new FindNamedClassConsumer; + } + }; + +Creating an ASTConsumer +======================= + +ASTConsumer is an interface used to write generic actions on an AST, +regardless of how the AST was produced. ASTConsumer provides many +different entry points, but for our use case the only one needed is +HandleTranslationUnit, which is called with the ASTContext for the +translation unit. + +:: + + class FindNamedClassConsumer : public clang::ASTConsumer { + public: + virtual void HandleTranslationUnit(clang::ASTContext &Context) { + // Traversing the translation unit decl via a RecursiveASTVisitor + // will visit all nodes in the AST. + Visitor.TraverseDecl(Context.getTranslationUnitDecl()); + } + private: + // A RecursiveASTVisitor implementation. + FindNamedClassVisitor Visitor; + }; + +Using the RecursiveASTVisitor +============================= + +Now that everything is hooked up, the next step is to implement a +RecursiveASTVisitor to extract the relevant information from the AST. + +The RecursiveASTVisitor provides hooks of the form bool +VisitNodeType(NodeType \*) for most AST nodes; the exception are TypeLoc +nodes, which are passed by-value. We only need to implement the methods +for the relevant node types. + +Let's start by writing a RecursiveASTVisitor that visits all +CXXRecordDecl's. + +:: + + class FindNamedClassVisitor + : public RecursiveASTVisitor<FindNamedClassVisitor> { + public: + bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { + // For debugging, dumping the AST nodes will show which nodes are already + // being visited. + Declaration->dump(); + + // The return value indicates whether we want the visitation to proceed. + // Return false to stop the traversal of the AST. + return true; + } + }; + +In the methods of our RecursiveASTVisitor we can now use the full power +of the Clang AST to drill through to the parts that are interesting for +us. For example, to find all class declaration with a certain name, we +can check for a specific qualified name: + +:: + + bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { + if (Declaration->getQualifiedNameAsString() == "n::m::C") + Declaration->dump(); + return true; + } + +Accessing the SourceManager and ASTContext +========================================== + +Some of the information about the AST, like source locations and global +identifier information, are not stored in the AST nodes themselves, but +in the ASTContext and its associated source manager. To retrieve them we +need to hand the ASTContext into our RecursiveASTVisitor implementation. + +The ASTContext is available from the CompilerInstance during the call to +CreateASTConsumer. We can thus extract it there and hand it into our +freshly created FindNamedClassConsumer: + +:: + + virtual clang::ASTConsumer *CreateASTConsumer( + clang::CompilerInstance &Compiler, llvm::StringRef InFile) { + return new FindNamedClassConsumer(&Compiler.getASTContext()); + } + +Now that the ASTContext is available in the RecursiveASTVisitor, we can +do more interesting things with AST nodes, like looking up their source +locations: + +:: + + bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { + if (Declaration->getQualifiedNameAsString() == "n::m::C") { + // getFullLoc uses the ASTContext's SourceManager to resolve the source + // location and break it up into its line and column parts. + FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart()); + if (FullLocation.isValid()) + llvm::outs() << "Found declaration at " + << FullLocation.getSpellingLineNumber() << ":" + << FullLocation.getSpellingColumnNumber() << "\n"; + } + return true; + } + +Putting it all together +======================= + +Now we can combine all of the above into a small example program: + +:: + + #include "clang/AST/ASTConsumer.h" + #include "clang/AST/RecursiveASTVisitor.h" + #include "clang/Frontend/CompilerInstance.h" + #include "clang/Frontend/FrontendAction.h" + #include "clang/Tooling/Tooling.h" + + using namespace clang; + + class FindNamedClassVisitor + : public RecursiveASTVisitor<FindNamedClassVisitor> { + public: + explicit FindNamedClassVisitor(ASTContext *Context) + : Context(Context) {} + + bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) { + if (Declaration->getQualifiedNameAsString() == "n::m::C") { + FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart()); + if (FullLocation.isValid()) + llvm::outs() << "Found declaration at " + << FullLocation.getSpellingLineNumber() << ":" + << FullLocation.getSpellingColumnNumber() << "\n"; + } + return true; + } + + private: + ASTContext *Context; + }; + + class FindNamedClassConsumer : public clang::ASTConsumer { + public: + explicit FindNamedClassConsumer(ASTContext *Context) + : Visitor(Context) {} + + virtual void HandleTranslationUnit(clang::ASTContext &Context) { + Visitor.TraverseDecl(Context.getTranslationUnitDecl()); + } + private: + FindNamedClassVisitor Visitor; + }; + + class FindNamedClassAction : public clang::ASTFrontendAction { + public: + virtual clang::ASTConsumer *CreateASTConsumer( + clang::CompilerInstance &Compiler, llvm::StringRef InFile) { + return new FindNamedClassConsumer(&Compiler.getASTContext()); + } + }; + + int main(int argc, char **argv) { + if (argc > 1) { + clang::tooling::runToolOnCode(new FindNamedClassAction, argv[1]); + } + } + +We store this into a file called FindClassDecls.cpp and create the +following CMakeLists.txt to link it: + +:: + + set(LLVM_USED_LIBS clangTooling) + + add_clang_executable(find-class-decls FindClassDecls.cpp) + +When running this tool over a small code snippet it will output all +declarations of a class n::m::C it found: + +:: + + $ ./bin/find-class-decls "namespace n { namespace m { class C {}; } }" + Found declaration at 1:29 + diff --git a/docs/README.txt b/docs/README.txt new file mode 100644 index 0000000..c4e565f --- /dev/null +++ b/docs/README.txt @@ -0,0 +1 @@ +See llvm/docs/README.txt diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html deleted file mode 100644 index a1ba15c..0000000 --- a/docs/ReleaseNotes.html +++ /dev/null @@ -1,325 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>Clang 3.2 Release Notes</title> -<link type="text/css" rel="stylesheet" href="../menu.css"> -<link type="text/css" rel="stylesheet" href="../content.css"> -<style type="text/css"> -td { - vertical-align: top; -} -</style> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>Clang 3.2 Release Notes</h1> - -<img style="float:right" src="http://llvm.org/img/DragonSmall.png" - width="136" height="136" alt="LLVM Dragon Logo"> - -<ul> - <li><a href="#intro">Introduction</a></li> - <li><a href="#whatsnew">What's New in Clang 3.2?</a> - <ul> - <li><a href="#majorfeatures">Major New Features</a></li> - <li><a href="#newflags">New Compiler Flags</a></li> - <li><a href="#cchanges">C Language Changes</a></li> - <li><a href="#cxxchanges">C++ Language Changes</a></li> - <li><a href="#objcchanges">Objective-C Language Changes</a></li> - <li><a href="#apichanges">Internal API Changes</a></li> - <li><a href="#pythonchanges">Python Binding Changes</a></li> - </ul> - </li> - <li><a href="#knownproblems">Known Problems</a></li> - <li><a href="#additionalinfo">Additional Information</a></li> -</ul> - -<div class="doc_author"> - <p>Written by the <a href="http://llvm.org/">LLVM Team</a></p> -</div> - -<!-- ======================================================================= --> -<h2 id="intro">Introduction</h2> -<!-- ======================================================================= --> - -<p>This document contains the release notes for the Clang C/C++/Objective-C - frontend, part of the LLVM Compiler Infrastructure, release 3.2. Here we - describe the status of Clang in some detail, including major improvements - from the previous release and new feature work. For the general LLVM release - notes, see <a href="http://llvm.org/docs/ReleaseNotes.html">the LLVM - documentation</a>. All LLVM releases may be downloaded from the - <a href="http://llvm.org/releases/">LLVM releases web site</a>.</p> - -<p>For more information about Clang or LLVM, including information about the - latest release, please check out the main please see the - <a href="http://clang.llvm.org">Clang Web Site</a> or the - <a href="http://llvm.org">LLVM Web Site</a>. - -<p>Note that if you are reading this file from a Subversion checkout or the main - Clang web page, this document applies to the <i>next</i> release, not the - current one. To see the release notes for a specific release, please see the - <a href="http://llvm.org/releases/">releases page</a>.</p> - -<!-- ======================================================================= --> -<h2 id="whatsnew">What's New in Clang 3.2?</h2> -<!-- ======================================================================= --> - -<p>Some of the major new features and improvements to Clang are listed here. - Generic improvements to Clang as a whole or to its underlying infrastructure - are described first, followed by language-specific sections with improvements - to Clang's support for those languages.</p> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="majorfeatures">Major New Features</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<h4 id="diagnostics">Improvements to Clang's diagnostics</h4> - -<p>Clang's diagnostics are constantly being improved to catch more issues, -explain them more clearly, and provide more accurate source information about -them. The improvements since the 3.1 release include:</p> - -<ul> - <li><tt>-Wuninitialized</tt> has been taught to recognize uninitialized uses - which always occur when an explicitly-written non-constant condition is either - <tt>true</tt> or <tt>false</tt>. For example: - -<pre> -int f(bool b) { - int n; - if (b) - n = 1; - return n; -} - -<b>sometimes-uninit.cpp:3:7: <span class="warning">warning:</span> variable 'n' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]</b> - if (b) - <span class="caret">^</span> -<b>sometimes-uninit.cpp:5:10: <span class="note">note:</span></b> uninitialized use occurs here - return n; - <span class="caret">^</span> -<b>sometimes-uninit.cpp:3:3: <span class="note">note:</span></b> remove the 'if' if its condition is always true - if (b) - <span class="caret">^~~~~~</span> -<b>sometimes-uninit.cpp:2:8: <span class="note">note:</span></b> initialize the variable 'n' to silence this warning - int n; - <span class="caret">^</span> - <span class="caret"> = 0</span> -</pre> - - This functionality can be enabled or disabled separately from - <tt>-Wuninitialized</tt> with the <tt>-Wsometimes-uninitialized</tt> warning - flag.</li> - - <li>Template type diffing improves the display of diagnostics with templated - types in them. - -<pre> -int f(vector<map<int, double>>); -int x = f(vector<map<int, float>>()); -</pre> - The error message is the same, but the note is different based on the options selected. -<pre> -<b>template-diff.cpp:5:9: <span class="error">error:</span> no matching function for call to 'f'</b> -int x = f(vector<map<int, float>>()); - <span class="caret">^</span> -</pre> - Templated type diffing with type elision (default): -<pre> -<b>template-diff.cpp:4:5: <span class="note">note:</span></b> candidate function not viable: no known conversion from 'vector<map<[...], <span class="template-highlight">float</span>>>' to 'vector<map<[...], <span class="template-highlight">double</span>>>' for 1st argument; -int f(vector<map<int, double>>); - <span class="caret">^</span> -</pre> - Templated type diffing without type elision (-fno-elide-type): -<pre> -<b>template-diff.cpp:4:5: <span class="note">note:</span></b> candidate function not viable: no known conversion from 'vector<map<int, <span class="template-highlight">float</span>>>' to 'vector<map<int, <span class="template-highlight">double</span>>>' for 1st argument; -int f(vector<map<int, double>>); - <span class="caret">^</span> -</pre> - Templated tree printing with type elision (-fdiagnostics-show-template-tree): -<pre> -<b>template-diff.cpp:4:5: <span class="note">note:</span></b> candidate function not viable: no known conversion for 1st argument; - vector< - map< - [...], - [<span class="template-highlight">float</span> != <span class="template-highlight">double</span>]>> -int f(vector<map<int, double>>); - <span class="caret">^</span> -</pre> - Templated tree printing without type elision (-fdiagnostics-show-template-tree -fno-elide-type): -<pre> -<b>template-diff.cpp:4:5: <span class="note">note:</span></b> candidate function not viable: no known conversion for 1st argument; - vector< - map< - int, - [<span class="template-highlight">float</span> != <span class="template-highlight">double</span>]>> -int f(vector<map<int, double>>); - <span class="caret">^</span> -</pre> - - </li> - - <li>The Address Sanitizer feature and Clang's <tt>-fcatch-undefined-behavior</tt> option have been moved to a unified flag set: - <tt>-fsanitize</tt>. This flag can be used to enable the different dynamic checking tools when building. For example, - <tt>-faddress-sanitizer</tt> is now <tt>-fsanitize=address</tt>, and <tt>-fcatch-undefined-behavior</tt> is now - <tt>-fsanitize=undefined</tt>. With this release the set of checks available continues to grow, see the Clang - documentation and specific sanitizer notes below for details. - </li> - -</ul> - -<h4 id="tlsmodel">Support for <code>tls_model</code> attribute</h4> - -<p>Clang now supports the <code>tls_model</code> attribute, allowing code that -uses thread-local storage to explicitly select which model to use. The available -models are <code>"global-dynamic"</code>, <code>"local-dynamic"</code>, -<code>"initial-exec"</code> and <code>"local-exec"</code>. See -<a href="http://www.akkadia.org/drepper/tls.pdf">ELF Handling For Thread-Local - Storage</a> for more information.</p> - -<p>The compiler is free to choose a different model if the specified model is not -supported by the target, or if the compiler determines that a more specific -model can be used. -</p> - -<h4>Type safety attributes</h4> -<p>Clang now supports type safety attributes that allow checking during compile -time that 'void *' function arguments and arguments for variadic functions are -of a particular type which is determined by some other argument to the same -function call.</p> - -<p>Usecases include:</p> -<ul> -<li>MPI library implementations, where these attributes enable checking that - buffer type matches the passed <code>MPI_Datatype</code>;</li> -<li> HDF5 library -- similar usecase as for MPI;</li> -<li> checking types of variadic functions' arguments for functions like -<code>fcntl()</code> and <code>ioctl()</code>.</li> -</ul> - -<p>See entries for <code>argument_with_type_tag</code>, -<code>pointer_with_type_tag</code> and <code>type_tag_for_datatype</code> -attributes in Clang language extensions documentation.</p> - -<h4>Documentation comment support</h4> -<p>Clang now supports documentation comments written in a Doxygen-like syntax. -Clang parses the comments and can detect syntactic and semantic errors in -comments. These warnings are off by default. Pass <tt>-Wdocumentation</tt> -flag to enable warnings about documentation comments.</p> - -<p>For example, given:</p> - -<pre>/// \param [in] Str the string. -/// \returns a modified string. -void do_something(const std::string &str);</pre> - -<p><tt>clang -Wdocumentation</tt> will emit two warnings:</p> - -<pre><b>doc-test.cc:3:6: <span class="warning">warning:</span></b> '\returns' command used in a comment that is attached to a function returning void [-Wdocumentation] -/// \returns a modified string. - <span class="caret">~^~~~~~~~~~~~~~~~~~~~~~~~~~</span> -<b>doc-test.cc:2:17: <span class="warning">warning:</span></b> parameter 'Str' not found in the function declaration [-Wdocumentation] -/// \param [in] Str the string. - <span class="caret">^~~</span> -<b>doc-test.cc:2:17: <span class="note">note:</span></b> did you mean 'str'? -/// \param [in] Str the string. - <span class="caret">^~~</span> - <span class="caret">str</span></pre> - -<p>libclang includes a new API, <tt>clang_FullComment_getAsXML</tt>, to convert -comments to XML documents. This API can be used to build documentation -extraction tools.</p> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="newflags">New Compiler Flags</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<ul> - <li><tt>-gline-tables-only</tt> controls the - <a href="http://clang.llvm.org/docs/UsersManual.html#debuginfosize">size of debug information</a>. - This flag tells Clang to emit debug info which is just enough to obtain stack traces with - function names, file names and line numbers (by such tools as gdb or addr2line). - Debug info for variables or function parameters is not produced, which reduces - the size of the resulting binary. - - <li><tt>-ftls-model</tt> controls which TLS model to use for thread-local - variables. This can be overridden per variable using the - <a href="#tlsmodel"><tt>tls_model</tt> attribute</a> mentioned above. - For more details, see the <a href="UsersManual.html#opt_ftls-model">User's - Manual</a>.</li> -</ul> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="cchanges">C Language Changes in Clang</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<h4 id="c11changes">C11 Feature Support</h4> - -<p>Clang 3.2 adds support for the C11 <code>_Alignof</code> keyword, pedantic warning through option - <code>-Wempty-translation-unit</code> (C11 6.9p1) </p> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="cxxchanges">C++ Language Changes in Clang</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<h4 id="cxx11changes">C++11 Feature Support</h4> - -<p>Clang 3.2 supports <a href="http://clang.llvm.org/cxx_status.html#cxx11">most of the language features</a> - added in the latest ISO C++ standard,<a href="http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=50372">C++ 2011</a>. - Use <code>-std=c++11</code> or <code>-std=gnu++11</code> to enable support for these features. In addition to the features supported by Clang 3.1, the - following features have been added:</p> - -<ul> - <li>Implemented the C++11 discarded value expression rules for volatile lvalues.</li> - <li>Support for the C++11 enum forward declarations.</li> - <li>Handling of C++11 attribute namespaces (automatically).</li> - <li>Implemented C++11 [conv.prom]p4: an enumeration with a fixed underlying type has integral promotions - to both its underlying type and to its underlying type's promoted type.</li> -</ul> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="objcchanges">Objective-C Language Changes in Clang</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p>Bug-fixes, no functionality changes.</p> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="pythonchanges">Python Binding Changes</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -The following classes and methods have been added: -<ul> - <li>class CompilationDatabaseError(Exception)</li> - <li>class CompileCommand(object)</li> - <li>class CompileCommands(object)</li> - <li>class CompilationDatabase(ClangObject)</li> - <li>Cursor.is_static_method</li> - <li>Cursor.is_static_method</li> - <li>SourceLocation.from_offset</li> - <li>Cursor.is_static_method</li> -</ul> - -<!-- ======================================================================= --> -<h2 id="additionalinfo">Additional Information</h2> -<!-- ======================================================================= --> - -<p>A wide variety of additional information is available on the - <a href="http://clang.llvm.org/">Clang web page</a>. The web page contains - versions of the API documentation which are up-to-date with the Subversion - version of the source code. You can access versions of these documents - specific to this release by going into the "<tt>clang/doc/</tt>" directory in - the Clang tree.</p> - -<p>If you have any questions or comments about Clang, please feel free to - contact us via - the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev"> mailing - list</a>.</p> - - -</div> -</body> -</html> diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst new file mode 100644 index 0000000..d9a3364 --- /dev/null +++ b/docs/ReleaseNotes.rst @@ -0,0 +1,147 @@ +===================================== +Clang 3.3 (In-Progress) Release Notes +===================================== + +.. contents:: + :local: + :depth: 2 + +Written by the `LLVM Team <http://llvm.org/>`_ + +.. warning:: + + These are in-progress notes for the upcoming Clang 3.3 release. You may + prefer the `Clang 3.2 Release Notes + <http://llvm.org/releases/3.2/docs/ClangReleaseNotes.html>`_. + +Introduction +============ + +This document contains the release notes for the Clang C/C++/Objective-C +frontend, part of the LLVM Compiler Infrastructure, release 3.3. Here we +describe the status of Clang in some detail, including major +improvements from the previous release and new feature work. For the +general LLVM release notes, see `the LLVM +documentation <http://llvm.org/docs/ReleaseNotes.html>`_. All LLVM +releases may be downloaded from the `LLVM releases web +site <http://llvm.org/releases/>`_. + +For more information about Clang or LLVM, including information about +the latest release, please check out the main please see the `Clang Web +Site <http://clang.llvm.org>`_ or the `LLVM Web +Site <http://llvm.org>`_. + +Note that if you are reading this file from a Subversion checkout or the +main Clang web page, this document applies to the *next* release, not +the current one. To see the release notes for a specific release, please +see the `releases page <http://llvm.org/releases/>`_. + +What's New in Clang 3.3? +======================== + +Some of the major new features and improvements to Clang are listed +here. Generic improvements to Clang as a whole or to its underlying +infrastructure are described first, followed by language-specific +sections with improvements to Clang's support for those languages. + +Major New Features +------------------ + +Improvements to Clang's diagnostics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Clang's diagnostics are constantly being improved to catch more issues, +explain them more clearly, and provide more accurate source information +about them. The improvements since the 3.2 release include: + +- ... + +Extended Identifiers: Unicode Support and Universal Character Names +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Clang 3.3 includes support for *extended identifiers* in C99 and C++. +This feature allows identifiers to contain certain Unicode characters, as +specified by the active language standard; these characters can be written +directly in the source file using the UTF-8 encoding, or referred to using +*universal character names* (``\u00E0``, ``\U000000E0``). + +New Compiler Flags +------------------ + +- ... + +C Language Changes in Clang +--------------------------- + +C11 Feature Support +^^^^^^^^^^^^^^^^^^^ + +... + +C++ Language Changes in Clang +----------------------------- + +C++11 Feature Support +^^^^^^^^^^^^^^^^^^^^^ + +... + +Objective-C Language Changes in Clang +------------------------------------- + +... + +Internal API Changes +-------------------- + +These are major API changes that have happened since the 3.2 release of +Clang. If upgrading an external codebase that uses Clang as a library, +this section should help get you past the largest hurdles of upgrading. + +Value Casting +^^^^^^^^^^^^^ + +Certain type hierarchies (TypeLoc, CFGElement, ProgramPoint, and SVal) were +misusing the llvm::cast machinery to perform undefined operations. Their APIs +have been changed to use two member function templates that return values +instead of pointers or references - "T castAs" and "Optional<T> getAs" (in the +case of the TypeLoc hierarchy the latter is "T getAs" and you can use the +boolean testability of a TypeLoc (or its 'validity') to verify that the cast +succeeded). Essentially all previous 'cast' usage should be replaced with +'castAs' and 'dyn_cast' should be replaced with 'getAs'. See r175462 for the +first example of such a change along with many examples of how code was +migrated to the new API. + +Storage Class +^^^^^^^^^^^^^ + +For each variable and function Clang used to keep the storage class as written +in the source, the linkage and a semantic storage class. This was a bit +redundant and the semantic storage class has been removed. The method +getStorageClass now returns what is written it the source code for that decl. + +... + +Python Binding Changes +---------------------- + +The following methods have been added: + +- ... + +Significant Known Problems +========================== + +Additional Information +====================== + +A wide variety of additional information is available on the `Clang web +page <http://clang.llvm.org/>`_. The web page contains versions of the +API documentation which are up-to-date with the Subversion version of +the source code. You can access versions of these documents specific to +this release by going into the "``clang/docs/``" directory in the Clang +tree. + +If you have any questions or comments about Clang, please feel free to +contact us via the `mailing +list <http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev>`_. diff --git a/docs/ThreadSanitizer.html b/docs/ThreadSanitizer.html deleted file mode 100644 index aa251c1..0000000 --- a/docs/ThreadSanitizer.html +++ /dev/null @@ -1,126 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ --> -<html> -<head> - <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> - <title>ThreadSanitizer, a race detector</title> - <link type="text/css" rel="stylesheet" href="../menu.css"> - <link type="text/css" rel="stylesheet" href="../content.css"> - <style type="text/css"> - td { - vertical-align: top; - } - </style> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>ThreadSanitizer</h1> -<ul> - <li> <a href="#intro">Introduction</a> - <li> <a href="#howtobuild">How to Build</a> - <li> <a href="#platforms">Supported Platforms</a> - <li> <a href="#usage">Usage</a> - <li> <a href="#limitations">Limitations</a> - <li> <a href="#status">Current Status</a> - <li> <a href="#moreinfo">More Information</a> -</ul> - -<h2 id="intro">Introduction</h2> -ThreadSanitizer is a tool that detects data races. <BR> -It consists of a compiler instrumentation module and a run-time library. <BR> -Typical slowdown introduced by ThreadSanitizer is <b>5x-15x</b> (TODO: these numbers are -approximate so far). - -<h2 id="howtobuild">How to build</h2> -Follow the <a href="../get_started.html">clang build instructions</a>. -CMake build is supported.<BR> - -<h2 id="platforms">Supported Platforms</h2> -ThreadSanitizer is supported on Linux x86_64 (tested on Ubuntu 10.04). <BR> -Support for MacOS 10.7 (64-bit only) is planned for late 2012. <BR> -Support for 32-bit platforms is problematic and not yet planned. - - - -<h2 id="usage">Usage</h2> -Simply compile your program with <tt>-fsanitize=thread -fPIE</tt> and link it -with <tt>-fsanitize=thread -pie</tt>.<BR> -To get a reasonable performance add <tt>-O1</tt> or higher. <BR> -Use <tt>-g</tt> to get file names and line numbers in the warning messages. <BR> - -Example: -<pre> -% cat projects/compiler-rt/lib/tsan/output_tests/tiny_race.c -#include <pthread.h> -int Global; -void *Thread1(void *x) { - Global = 42; - return x; -} -int main() { - pthread_t t; - pthread_create(&t, NULL, Thread1, NULL); - Global = 43; - pthread_join(t, NULL); - return Global; -} -</pre> - -<pre> -% clang -fsanitize=thread -g -O1 tiny_race.c -fPIE -pie -</pre> - -If a bug is detected, the program will print an error message to stderr. -Currently, ThreadSanitizer symbolizes its output using an external -<tt>addr2line</tt> -process (this will be fixed in future). -<pre> -% TSAN_OPTIONS=strip_path_prefix=`pwd`/ # Don't print full paths. -% ./a.out 2> log -% cat log -WARNING: ThreadSanitizer: data race (pid=19219) - Write of size 4 at 0x7fcf47b21bc0 by thread 1: - #0 Thread1 tiny_race.c:4 (exe+0x00000000a360) - Previous write of size 4 at 0x7fcf47b21bc0 by main thread: - #0 main tiny_race.c:10 (exe+0x00000000a3b4) - Thread 1 (running) created at: - #0 pthread_create ??:0 (exe+0x00000000c790) - #1 main tiny_race.c:9 (exe+0x00000000a3a4) -</pre> - - -<h2 id="limitations">Limitations</h2> -<ul> -<li> ThreadSanitizer uses more real memory than a native run. -At the default settings the memory overhead is 9x plus 9Mb per each thread. -Settings with 5x and 3x overhead (but less accurate analysis) are also available. -<li> ThreadSanitizer maps (but does not reserve) a lot of virtual address space. -This means that tools like <tt>ulimit</tt> may not work as usually expected. -<li> Static linking is not supported. -<li> ThreadSanitizer requires <tt>-fPIE -pie</tt> -</ul> - - -<h2 id="status">Current Status</h2> -ThreadSanitizer is in alpha stage. -It is known to work on large C++ programs using pthreads, but we do not promise -anything (yet). <BR> -C++11 threading is not yet supported. <BR> -The test suite is integrated into CMake build and can be run with -<tt>make check-tsan</tt> command. <BR> - -We are actively working on enhancing the tool -- stay tuned. -Any help, especially in the form of minimized standalone tests is more than welcome. - -<h2 id="moreinfo">More Information</h2> -<a href="http://code.google.com/p/thread-sanitizer/">http://code.google.com/p/thread-sanitizer</a>. - - -</div> -</body> -</html> diff --git a/docs/ThreadSanitizer.rst b/docs/ThreadSanitizer.rst new file mode 100644 index 0000000..c0c576b --- /dev/null +++ b/docs/ThreadSanitizer.rst @@ -0,0 +1,126 @@ +ThreadSanitizer +=============== + +Introduction +------------ + +ThreadSanitizer is a tool that detects data races. It consists of a compiler +instrumentation module and a run-time library. Typical slowdown introduced by +ThreadSanitizer is about **5x-15x**. Typical memory overhead introduced by +ThreadSanitizer is about **5x-10x**. + +How to build +------------ + +Follow the `Clang build instructions <../get_started.html>`_. CMake build is +supported. + +Supported Platforms +------------------- + +ThreadSanitizer is supported on Linux x86_64 (tested on Ubuntu 10.04 and 12.04). +Support for MacOS 10.7 (64-bit only) is planned for 2013. Support for 32-bit +platforms is problematic and not yet planned. + +Usage +----- + +Simply compile your program with ``-fsanitize=thread -fPIE`` and link it with +``-fsanitize=thread -pie``. To get a reasonable performance add ``-O1`` or +higher. Use ``-g`` to get file names and line numbers in the warning messages. + +Example: + +.. code-block:: c++ + + % cat projects/compiler-rt/lib/tsan/lit_tests/tiny_race.c + #include <pthread.h> + int Global; + void *Thread1(void *x) { + Global = 42; + return x; + } + int main() { + pthread_t t; + pthread_create(&t, NULL, Thread1, NULL); + Global = 43; + pthread_join(t, NULL); + return Global; + } + + $ clang -fsanitize=thread -g -O1 tiny_race.c -fPIE -pie + +If a bug is detected, the program will print an error message to stderr. +Currently, ThreadSanitizer symbolizes its output using an external +``addr2line`` process (this will be fixed in future). + +.. code-block:: bash + + % ./a.out + WARNING: ThreadSanitizer: data race (pid=19219) + Write of size 4 at 0x7fcf47b21bc0 by thread T1: + #0 Thread1 tiny_race.c:4 (exe+0x00000000a360) + + Previous write of size 4 at 0x7fcf47b21bc0 by main thread: + #0 main tiny_race.c:10 (exe+0x00000000a3b4) + + Thread T1 (running) created at: + #0 pthread_create tsan_interceptors.cc:705 (exe+0x00000000c790) + #1 main tiny_race.c:9 (exe+0x00000000a3a4) + +``__has_feature(thread_sanitizer)`` +------------------------------------ + +In some cases one may need to execute different code depending on whether +ThreadSanitizer is enabled. +:ref:`\_\_has\_feature <langext-__has_feature-__has_extension>` can be used for +this purpose. + +.. code-block:: c + + #if defined(__has_feature) + # if __has_feature(thread_sanitizer) + // code that builds only under ThreadSanitizer + # endif + #endif + +``__attribute__((no_sanitize_thread))`` +----------------------------------------------- + +Some code should not be instrumented by ThreadSanitizer. +One may use the function attribute +:ref:`no_sanitize_thread <langext-thread_sanitizer>` +to disable instrumentation of plain (non-atomic) loads/stores in a particular function. +ThreadSanitizer may still instrument such functions to avoid false positives. +This attribute may not be +supported by other compilers, so we suggest to use it together with +``__has_feature(thread_sanitizer)``. Note: currently, this attribute will be +lost if the function is inlined. + +Limitations +----------- + +* ThreadSanitizer uses more real memory than a native run. At the default + settings the memory overhead is 5x plus 1Mb per each thread. Settings with 3x + (less accurate analysis) and 9x (more accurate analysis) overhead are also + available. +* ThreadSanitizer maps (but does not reserve) a lot of virtual address space. + This means that tools like ``ulimit`` may not work as usually expected. +* Libc/libstdc++ static linking is not supported. +* ThreadSanitizer requires ``-fPIE -pie`` compiler flags. + +Current Status +-------------- + +ThreadSanitizer is in beta stage. It is known to work on large C++ programs +using pthreads, but we do not promise anything (yet). C++11 threading is +supported with llvm libc++. The test suite is integrated into CMake build +and can be run with ``make check-tsan`` command. + +We are actively working on enhancing the tool --- stay tuned. Any help, +especially in the form of minimized standalone tests is more than welcome. + +More Information +---------------- +`http://code.google.com/p/thread-sanitizer <http://code.google.com/p/thread-sanitizer/>`_. + diff --git a/docs/Tooling.html b/docs/Tooling.html deleted file mode 100644 index 74837f4..0000000 --- a/docs/Tooling.html +++ /dev/null @@ -1,120 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>Writing Clang Tools</title> -<link type="text/css" rel="stylesheet" href="../menu.css"> -<link type="text/css" rel="stylesheet" href="../content.css"> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>Writing Clang Tools</h1> -<p>Clang provides infrastructure to write tools that need syntactic and semantic -information about a program. This document will give a short introduction of the -different ways to write clang tools, and their pros and cons.</p> - -<!-- ======================================================================= --> -<h2 id="libclang"><a href="http://clang.llvm.org/doxygen/group__CINDEX.html">LibClang</a></h2> -<!-- ======================================================================= --> - -<p>LibClang is a stable high level C interface to clang. When in doubt LibClang -is probably the interface you want to use. Consider the other interfaces only -when you have a good reason not to use LibClang.</p> -<p>Canonical examples of when to use LibClang:</p> -<ul> - <li>Xcode</li> - <li>Clang Python Bindings</li> -</ul> -<p>Use LibClang when you...</p> -<ul> - <li>want to interface with clang from other languages than C++</li> - <li>need a stable interface that takes care to be backwards compatible</li> - <li>want powerful high-level abstractions, like iterating through an AST -with a cursor, and don't want to learn all the nitty gritty details of Clang's -AST.</li> -</ul> -<p>Do not use LibClang when you...</p> -<ul> - <li>want full control over the Clang AST</li> -</ul> - -<!-- ======================================================================= --> -<h2 id="clang-plugins"><a href="ClangPlugins.html">Clang Plugins</a></h2> -<!-- ======================================================================= --> - -<p>Clang Plugins allow you to run additional actions on the AST as part of -a compilation. Plugins are dynamic libraries that are loaded at runtime by -the compiler, and they're easy to integrate into your build environment.</p> -<p>Canonical examples of when to use Clang Plugins:</p> -<ul> - <li>special lint-style warnings or errors for your project</li> - <li>creating additional build artifacts from a single compile step</li> -</ul> -<p>Use Clang Plugins when you...</p> -<ul> - <li>need your tool to rerun if any of the dependencies change</li> - <li>want your tool to make or break a build</li> - <li>need full control over the Clang AST</li> -</ul> -<p>Do not use Clang Plugins when you...</p> -<ul> - <li>want to run tools outside of your build environment</li> - <li>want full control on how Clang is set up, including mapping of in-memory - virtual files</li> - <li>need to run over a specific subset of files in your project which is not - necessarily related to any changes which would trigger rebuilds</li> -</ul> - -<!-- ======================================================================= --> -<h2 id="libtooling"><a href="LibTooling.html">LibTooling</a></h2> -<!-- ======================================================================= --> - -<p>LibTooling is a C++ interface aimed at writing standalone tools, as well as -integrating into services that run clang tools.</p> -<p>Canonical examples of when to use LibTooling:</p> -<ul> - <li>a simple syntax checker</li> - <li>refactoring tools</li> -</ul> -<p>Use LibTooling when you...</p> -<ul> - <li>want to run tools over a single file, or a specific subset of files, - independently of the build system</li> - <li>want full control over the Clang AST</li> - <li>want to share code with Clang Plugins</li> -</ul> -<p>Do not use LibTooling when you...</p> -<ul> - <li>want to run as part of the build triggered by dependency changes</li> - <li>want a stable interface so you don't need to change your code when the - AST API changes</li> - <li>want high level abstractions like cursors and code completion out of the - box</li> - <li>do not want to write your tools in C++</li> -</ul> - -<!-- ======================================================================= --> -<h2 id="clang-tools"><a href="ClangTools.html">Clang Tools</a></h2> -<!-- ======================================================================= --> - -<p>These are a collection of specific developer tools built on top of the -LibTooling infrastructure as part of the Clang project. They are targeted at -automating and improving core development activities of C/C++ developers.</p> -<p>Examples of tools we are building or planning as part of the Clang -project:</p> -<ul> - <li>Syntax checking (clang-check)</li> - <li>Automatic fixing of compile errors (clangc-fixit)</li> - <li>Automatic code formatting</li> - <li>Migration tools for new features in new language standards</li> - <li>Core refactoring tools</li> -</ul> - -</div> -</body> -</html> - diff --git a/docs/Tooling.rst b/docs/Tooling.rst new file mode 100644 index 0000000..25ee215 --- /dev/null +++ b/docs/Tooling.rst @@ -0,0 +1,97 @@ +================================================= +Choosing the Right Interface for Your Application +================================================= + +Clang provides infrastructure to write tools that need syntactic and semantic +information about a program. This document will give a short introduction of +the different ways to write clang tools, and their pros and cons. + +LibClang +-------- + +`LibClang <http://clang.llvm.org/doxygen/group__CINDEX.html>`_ is a stable high +level C interface to clang. When in doubt LibClang is probably the interface +you want to use. Consider the other interfaces only when you have a good +reason not to use LibClang. + +Canonical examples of when to use LibClang: + +* Xcode +* Clang Python Bindings + +Use LibClang when you...: + +* want to interface with clang from other languages than C++ +* need a stable interface that takes care to be backwards compatible +* want powerful high-level abstractions, like iterating through an AST with a + cursor, and don't want to learn all the nitty gritty details of Clang's AST. + +Do not use LibClang when you...: + +* want full control over the Clang AST + +Clang Plugins +------------- + +:doc:`Clang Plugins <ClangPlugins>` allow you to run additional actions on the +AST as part of a compilation. Plugins are dynamic libraries that are loaded at +runtime by the compiler, and they're easy to integrate into your build +environment. + +Canonical examples of when to use Clang Plugins: + +* special lint-style warnings or errors for your project +* creating additional build artifacts from a single compile step + +Use Clang Plugins when you...: + +* need your tool to rerun if any of the dependencies change +* want your tool to make or break a build +* need full control over the Clang AST + +Do not use Clang Plugins when you...: + +* want to run tools outside of your build environment +* want full control on how Clang is set up, including mapping of in-memory + virtual files +* need to run over a specific subset of files in your project which is not + necessarily related to any changes which would trigger rebuilds + +LibTooling +---------- + +:doc:`LibTooling <LibTooling>` is a C++ interface aimed at writing standalone +tools, as well as integrating into services that run clang tools. Canonical +examples of when to use LibTooling: + +* a simple syntax checker +* refactoring tools + +Use LibTooling when you...: + +* want to run tools over a single file, or a specific subset of files, + independently of the build system +* want full control over the Clang AST +* want to share code with Clang Plugins + +Do not use LibTooling when you...: + +* want to run as part of the build triggered by dependency changes +* want a stable interface so you don't need to change your code when the AST API + changes +* want high level abstractions like cursors and code completion out of the box +* do not want to write your tools in C++ + +:doc:`Clang tools <ClangTools>` are a collection of specific developer tools +built on top of the LibTooling infrastructure as part of the Clang project. +They are targeted at automating and improving core development activities of +C/C++ developers. + +Examples of tools we are building or planning as part of the Clang project: + +* Syntax checking (:program:`clang-check`) +* Automatic fixing of compile errors (:program:`clang-fixit`) +* Automatic code formatting (:program:`clang-format`) +* Migration tools for new features in new language standards +* Core refactoring tools + diff --git a/docs/UsersManual.html b/docs/UsersManual.html deleted file mode 100644 index 35fc5dc..0000000 --- a/docs/UsersManual.html +++ /dev/null @@ -1,1309 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>Clang Compiler User's Manual</title> -<link type="text/css" rel="stylesheet" href="../menu.css"> -<link type="text/css" rel="stylesheet" href="../content.css"> -<style type="text/css"> -td { - vertical-align: top; -} -</style> -</head> -<body> - -<!--#include virtual="../menu.html.incl"--> - -<div id="content"> - -<h1>Clang Compiler User's Manual</h1> - -<ul> -<li><a href="#intro">Introduction</a> - <ul> - <li><a href="#terminology">Terminology</a></li> - <li><a href="#basicusage">Basic Usage</a></li> - </ul> -</li> -<li><a href="#commandline">Command Line Options</a> - <ul> - <li><a href="#cl_diagnostics">Options to Control Error and Warning - Messages</a></li> - <li><a href="#cl_crash_diagnostics">Options to Control Clang Crash - Diagnostics</a></li> - </ul> -</li> -<li><a href="#general_features">Language and Target-Independent Features</a> - <ul> - <li><a href="#diagnostics">Controlling Errors and Warnings</a> - <ul> - <li><a href="#diagnostics_display">Controlling How Clang Displays Diagnostics</a></li> - <li><a href="#diagnostics_mappings">Diagnostic Mappings</a></li> - <li><a href="#diagnostics_categories">Diagnostic Categories</a></li> - <li><a href="#diagnostics_commandline">Controlling Diagnostics via Command Line Flags</a></li> - <li><a href="#diagnostics_pragmas">Controlling Diagnostics via Pragmas</a></li> - <li><a href="#diagnostics_systemheader">Controlling Diagnostics in System Headers</a></li> - <li><a href="#diagnostics_enable_everything">Enabling All Warnings</a></li> - <li><a href="#analyzer_diagnositics">Controlling Static Analyzer Diagnostics</a></li> - </ul> - </li> - <li><a href="#precompiledheaders">Precompiled Headers</a></li> - <li><a href="#codegen">Controlling Code Generation</a></li> - <li><a href="#debuginfosize">Controlling Size of Debug Information</a></li> - </ul> -</li> -<li><a href="#c">C Language Features</a> - <ul> - <li><a href="#c_ext">Extensions supported by clang</a></li> - <li><a href="#c_modes">Differences between various standard modes</a></li> - <li><a href="#c_unimpl_gcc">GCC extensions not implemented yet</a></li> - <li><a href="#c_unsupp_gcc">Intentionally unsupported GCC extensions</a></li> - <li><a href="#c_ms">Microsoft extensions</a></li> - </ul> -</li> -<li><a href="#cxx">C++ Language Features</a> - <ul> - <li><a href="#cxx_implimits">Controlling implementation limits</a></li> - </ul> -</li> -<li><a href="#target_features">Target-Specific Features and Limitations</a> - <ul> - <li><a href="#target_arch">CPU Architectures Features and Limitations</a> - <ul> - <li><a href="#target_arch_x86">X86</a></li> - <li><a href="#target_arch_arm">ARM</a></li> - <li><a href="#target_arch_other">Other platforms</a></li> - </ul> - </li> - <li><a href="#target_os">Operating System Features and Limitations</a> - <ul> - <li><a href="#target_os_darwin">Darwin (Mac OS/X)</a></li> - <li>Linux, etc.</li> - <li><a href="#target_os_win32">Windows</a></li> - </ul> - </li> - </ul> -</li> -</ul> - - -<!-- ======================================================================= --> -<h2 id="intro">Introduction</h2> -<!-- ======================================================================= --> - -<p>The Clang Compiler is an open-source compiler for the C family of programming -languages, aiming to be the best in class implementation of these languages. -Clang builds on the LLVM optimizer and code generator, allowing it to provide -high-quality optimization and code generation support for many targets. For -more general information, please see the <a href="http://clang.llvm.org">Clang -Web Site</a> or the <a href="http://llvm.org">LLVM Web Site</a>.</p> - -<p>This document describes important notes about using Clang as a compiler for -an end-user, documenting the supported features, command line options, etc. If -you are interested in using Clang to build a tool that processes code, please -see <a href="InternalsManual.html">the Clang Internals Manual</a>. If you are -interested in the <a href="http://clang-analyzer.llvm.org">Clang -Static Analyzer</a>, please see its web page.</p> - -<p>Clang is designed to support the C family of programming languages, which -includes <a href="#c">C</a>, <a href="#objc">Objective-C</a>, <a -href="#cxx">C++</a>, and <a href="#objcxx">Objective-C++</a> as well as many -dialects of those. For language-specific information, please see the -corresponding language specific section:</p> - -<ul> -<li><a href="#c">C Language</a>: K&R C, ANSI C89, ISO C90, ISO C94 - (C89+AMD1), ISO C99 (+TC1, TC2, TC3). </li> -<li><a href="#objc">Objective-C Language</a>: ObjC 1, ObjC 2, ObjC 2.1, plus - variants depending on base language.</li> -<li><a href="#cxx">C++ Language</a></li> -<li><a href="#objcxx">Objective C++ Language</a></li> -</ul> - -<p>In addition to these base languages and their dialects, Clang supports a -broad variety of language extensions, which are documented in the corresponding -language section. These extensions are provided to be compatible with the GCC, -Microsoft, and other popular compilers as well as to improve functionality -through Clang-specific features. The Clang driver and language features are -intentionally designed to be as compatible with the GNU GCC compiler as -reasonably possible, easing migration from GCC to Clang. In most cases, code -"just works".</p> - -<p>In addition to language specific features, Clang has a variety of features -that depend on what CPU architecture or operating system is being compiled for. -Please see the <a href="#target_features">Target-Specific Features and -Limitations</a> section for more details.</p> - -<p>The rest of the introduction introduces some basic <a -href="#terminology">compiler terminology</a> that is used throughout this manual -and contains a basic <a href="#basicusage">introduction to using Clang</a> -as a command line compiler.</p> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="terminology">Terminology</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p>Front end, parser, backend, preprocessor, undefined behavior, diagnostic, - optimizer</p> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="basicusage">Basic Usage</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p>Intro to how to use a C compiler for newbies.</p> -<p> -compile + link - -compile then link - -debug info - -enabling optimizations - -picking a language to use, defaults to C99 by default. Autosenses based on -extension. - -using a makefile -</p> - - -<!-- ======================================================================= --> -<h2 id="commandline">Command Line Options</h2> -<!-- ======================================================================= --> - -<p> -This section is generally an index into other sections. It does not go into -depth on the ones that are covered by other sections. However, the first part -introduces the language selection and other high level options like -c, -g, etc. -</p> - - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="cl_diagnostics">Options to Control Error and Warning Messages</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p><b>-Werror</b>: Turn warnings into errors.</p> -<p><b>-Werror=foo</b>: Turn warning "foo" into an error.</p> -<p><b>-Wno-error=foo</b>: Turn warning "foo" into an warning even if -Werror is - specified.</p> -<p><b>-Wfoo</b>: Enable warning "foo".</p> -<p><b>-Wno-foo</b>: Disable warning "foo".</p> -<p><b>-w</b>: Disable all warnings.</p> -<p><b>-Weverything</b>: <a href="#diagnostics_enable_everything">Enable <b>all</b> warnings.</a></p> -<p><b>-pedantic</b>: Warn on language extensions.</p> -<p><b>-pedantic-errors</b>: Error on language extensions.</p> -<p><b>-Wsystem-headers</b>: Enable warnings from system headers.</p> - -<p><b>-ferror-limit=123</b>: Stop emitting diagnostics after 123 errors have - been produced. The default is 20, and the error limit can be disabled with - -ferror-limit=0.</p> - -<p><b>-ftemplate-backtrace-limit=123</b>: Only emit up to 123 template instantiation notes within the template instantiation backtrace for a single warning or error. The default is 10, and the limit can be disabled with -ftemplate-backtrace-limit=0.</p> - -<!-- ================================================= --> -<h4 id="cl_diag_formatting">Formatting of Diagnostics</h4> -<!-- ================================================= --> - -<p>Clang aims to produce beautiful diagnostics by default, particularly for new -users that first come to Clang. However, different people have different -preferences, and sometimes Clang is driven by another program that wants to -parse simple and consistent output, not a person. For these cases, Clang -provides a wide range of options to control the exact output format of the -diagnostics that it generates.</p> - -<dl> - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_fshow-column"><b>-f[no-]show-column</b>: Print column number in -diagnostic.</dt> -<dd>This option, which defaults to on, controls whether or not Clang prints the -column number of a diagnostic. For example, when this is enabled, Clang will -print something like: - -<pre> - test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] - #endif bad - ^ - // -</pre> - -<p>When this is disabled, Clang will print "test.c:28: warning..." with no -column number.</p> - -<p>The printed column numbers count bytes from the beginning of the line; take -care if your source contains multibyte characters.</p> -</dd> - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_fshow-source-location"><b>-f[no-]show-source-location</b>: Print -source file/line/column information in diagnostic.</dt> -<dd>This option, which defaults to on, controls whether or not Clang prints the -filename, line number and column number of a diagnostic. For example, -when this is enabled, Clang will print something like: - -<pre> - test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] - #endif bad - ^ - // -</pre> - -<p>When this is disabled, Clang will not print the "test.c:28:8: " part.</p> -</dd> - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_fcaret-diagnostics"><b>-f[no-]caret-diagnostics</b>: Print source -line and ranges from source code in diagnostic.</dt> -<dd>This option, which defaults to on, controls whether or not Clang prints the -source line, source ranges, and caret when emitting a diagnostic. For example, -when this is enabled, Clang will print something like: - -<pre> - test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] - #endif bad - ^ - // -</pre> -</dd> -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_fcolor_diagnostics"><b>-f[no-]color-diagnostics</b>: </dt> -<dd>This option, which defaults to on when a color-capable terminal is - detected, controls whether or not Clang prints diagnostics in color. - When this option is enabled, Clang will use colors to highlight - specific parts of the diagnostic, e.g., - <pre> - <b><span style="color:black">test.c:28:8: <span style="color:magenta">warning</span>: extra tokens at end of #endif directive [-Wextra-tokens]</span></b> - #endif bad - <span style="color:green">^</span> - <span style="color:green">//</span> -</pre> - -<p>When this is disabled, Clang will just print:</p> - -<pre> - test.c:2:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] - #endif bad - ^ - // -</pre> -</dd> -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_fdiagnostics-format"><b>-fdiagnostics-format=clang/msvc/vi</b>: -Changes diagnostic output format to better match IDEs and command line tools.</dt> -<dd>This option controls the output format of the filename, line number, and column printed in diagnostic messages. The options, and their affect on formatting a simple conversion diagnostic, follow: - - <dl> - <dt><b>clang</b> (default)</dt> - <dd> - <pre>t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int'</pre> - </dd> - - <dt><b>msvc</b></dt> - <dd> - <pre>t.c(3,11) : warning: conversion specifies type 'char *' but the argument has type 'int'</pre> - </dd> - - <dt><b>vi</b></dt> - <dd> - <pre>t.c +3:11: warning: conversion specifies type 'char *' but the argument has type 'int'</pre> - </dd> - </dl> -</dd> - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_fdiagnostics-show-name"><b>-f[no-]diagnostics-show-name</b>: -Enable the display of the diagnostic name.</dt> -<dd>This option, which defaults to off, controls whether or not -Clang prints the associated name.<p></p></dd> -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_fdiagnostics-show-option"><b>-f[no-]diagnostics-show-option</b>: -Enable <tt>[-Woption]</tt> information in diagnostic line.</dt> -<dd>This option, which defaults to on, -controls whether or not Clang prints the associated <A -href="#cl_diag_warning_groups">warning group</a> option name when outputting -a warning diagnostic. For example, in this output: - -<pre> - test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] - #endif bad - ^ - // -</pre> - -<p>Passing <b>-fno-diagnostics-show-option</b> will prevent Clang from printing -the [<a href="#opt_Wextra-tokens">-Wextra-tokens</a>] information in the -diagnostic. This information tells you the flag needed to enable or disable the -diagnostic, either from the command line or through <a -href="#pragma_GCC_diagnostic">#pragma GCC diagnostic</a>.</dd> - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_fdiagnostics-show-category"><b>-fdiagnostics-show-category=none/id/name</b>: -Enable printing category information in diagnostic line.</dt> -<dd>This option, which defaults to "none", -controls whether or not Clang prints the category associated with a diagnostic -when emitting it. Each diagnostic may or many not have an associated category, -if it has one, it is listed in the diagnostic categorization field of the -diagnostic line (in the []'s). - -<p>For example, a format string warning will produce these three renditions -based on the setting of this option:</p> - -<pre> - t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat] - t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat<b>,1</b>] - t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat<b>,Format String</b>] -</pre> - -<p>This category can be used by clients that want to group diagnostics by -category, so it should be a high level category. We want dozens of these, not -hundreds or thousands of them.</p> -</dd> - - - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_fdiagnostics-fixit-info"><b>-f[no-]diagnostics-fixit-info</b>: -Enable "FixIt" information in the diagnostics output.</dt> -<dd>This option, which defaults to on, controls whether or not Clang prints the -information on how to fix a specific diagnostic underneath it when it knows. -For example, in this output: - -<pre> - test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] - #endif bad - ^ - // -</pre> - -<p>Passing <b>-fno-diagnostics-fixit-info</b> will prevent Clang from printing -the "//" line at the end of the message. This information is useful for users -who may not understand what is wrong, but can be confusing for machine -parsing.</p> -</dd> - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_fdiagnostics-print-source-range-info"> -<b>-f[no-]diagnostics-print-source-range-info</b>: -Print machine parsable information about source ranges.</dt> -<dd>This option, which defaults to off, controls whether or not Clang prints -information about source ranges in a machine parsable format after the -file/line/column number information. The information is a simple sequence of -brace enclosed ranges, where each range lists the start and end line/column -locations. For example, in this output: - -<pre> -exprs.c:47:15:{47:8-47:14}{47:17-47:24}: error: invalid operands to binary expression ('int *' and '_Complex float') - P = (P-42) + Gamma*4; - ~~~~~~ ^ ~~~~~~~ -</pre> - -<p>The {}'s are generated by -fdiagnostics-print-source-range-info.</p> - -<p>The printed column numbers count bytes from the beginning of the line; take -care if your source contains multibyte characters.</p> -</dd> - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_fdiagnostics-parseable-fixits"> -<b>-fdiagnostics-parseable-fixits</b>: -Print Fix-Its in a machine parseable form.</dt> -<dd><p>This option makes Clang print available Fix-Its in a machine parseable format at the end of diagnostics. The following example illustrates the format:</p> - -<pre> - fix-it:"t.cpp":{7:25-7:29}:"Gamma" -</pre> - -<p>The range printed is a half-open range, so in this example the characters at -column 25 up to but not including column 29 on line 7 in t.cpp should be -replaced with the string "Gamma". Either the range or the replacement -string may be empty (representing strict insertions and strict erasures, -respectively). Both the file name and the insertion string escape backslash (as -"\\"), tabs (as "\t"), newlines (as "\n"), double -quotes(as "\"") and non-printable characters (as octal -"\xxx").</p> - -<p>The printed column numbers count bytes from the beginning of the line; take -care if your source contains multibyte characters.</p> -</dd> - -<dt id="opt_fno-elide-type"> -<b>-fno-elide-type</b>: -Turns off elision in template type printing.</dt> -<dd><p>The default for template type printing is to elide as many template -arguments as possible, removing those which are the same in both template types, -leaving only the differences. Adding this flag will print all the template -arguments. If supported by the terminal, highlighting will still appear on -differing arguments.</p> - -Default: -<pre> -t.cc:4:5: <span class="note">note</span>: candidate function not viable: no known conversion from 'vector<map<[...], map<<span class="template-highlight">float</span>, [...]>>>' to 'vector<map<[...], map<<span class="template-highlight">double</span>, [...]>>>' for 1st argument; -</pre> --fno-elide-type: -<pre> -t.cc:4:5: <span class="note">note</span>: candidate function not viable: no known conversion from 'vector<map<int, map<<span class="template-highlight">float</span>, int>>>' to 'vector<map<int, map<<span class="template-highlight">double</span>, int>>>' for 1st argument; -</pre> -</dd> - -<dt id="opt_fdiagnostics-show-template-tree"> -<b>-fdiagnostics-show-template-tree</b>: -Template type diffing prints a text tree.</dt> -<dd><p>For diffing large templated types, this option will cause Clang to -display the templates as an indented text tree, one argument per line, with -differences marked inline. This is compatible with -fno-elide-type.</p> - -Default: -<pre> -t.cc:4:5: <span class="note">note</span>: candidate function not viable: no known conversion from 'vector<map<[...], map<<span class="template-highlight">float</span>, [...]>>>' to 'vector<map<[...], map<<span class="template-highlight">double</span>, [...]>>>' for 1st argument; -</pre> --fdiagnostics-show-template-tree -<pre> -t.cc:4:5: <span class="note">note</span>: candidate function not viable: no known conversion for 1st argument; - vector< - map< - [...], - map< - [<span class="template-highlight">float</span> != <span class="template-highlight">float</span>], - [...]>>> -</pre> -</dd> - -</dl> - - - -<!-- ===================================================== --> -<h4 id="cl_diag_warning_groups">Individual Warning Groups</h4> -<!-- ===================================================== --> - -<p>TODO: Generate this from tblgen. Define one anchor per warning group.</p> - - -<dl> - - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_Wextra-tokens"><b>-Wextra-tokens</b>: Warn about excess tokens at - the end of a preprocessor directive.</dt> -<dd>This option, which defaults to on, enables warnings about extra tokens at -the end of preprocessor directives. For example: - -<pre> - test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] - #endif bad - ^ -</pre> - -<p>These extra tokens are not strictly conforming, and are usually best handled -by commenting them out.</p> -</dd> - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_Wambiguous-member-template"><b>-Wambiguous-member-template</b>: -Warn about unqualified uses of a member template whose name resolves -to another template at the location of the use.</dt> -<dd>This option, which defaults to on, enables a warning in the -following code: - -<pre> -template<typename T> struct set{}; -template<typename T> struct trait { typedef const T& type; }; -struct Value { - template<typename T> void set(typename trait<T>::type value) {} -}; -void foo() { - Value v; - v.set<double>(3.2); -} -</pre> - -<p>C++ [basic.lookup.classref] requires this to be an error, but, -because it's hard to work around, Clang downgrades it to a warning as -an extension.</p> -</dd> - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dt id="opt_Wbind-to-temporary-copy"><b>-Wbind-to-temporary-copy</b>: Warn about -an unusable copy constructor when binding a reference to a temporary.</dt> -<dd>This option, which defaults to on, enables warnings about binding a -reference to a temporary when the temporary doesn't have a usable copy -constructor. For example: - -<pre> - struct NonCopyable { - NonCopyable(); - private: - NonCopyable(const NonCopyable&); - }; - void foo(const NonCopyable&); - void bar() { - foo(NonCopyable()); // Disallowed in C++98; allowed in C++11. - } -</pre> -<pre> - struct NonCopyable2 { - NonCopyable2(); - NonCopyable2(NonCopyable2&); - }; - void foo(const NonCopyable2&); - void bar() { - foo(NonCopyable2()); // Disallowed in C++98; allowed in C++11. - } -</pre> - -<p>Note that if <tt>NonCopyable2::NonCopyable2()</tt> has a default -argument whose instantiation produces a compile error, that error will -still be a hard error in C++98 mode even if this warning is turned -off.</p> - -</dd> - -</dl> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="cl_crash_diagnostics">Options to Control Clang Crash Diagnostics</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p>As unbelievable as it may sound, Clang does crash from time to time. -Generally, this only occurs to those living on the -<a href="http://llvm.org/releases/download.html#svn">bleeding edge</a>. Clang -goes to great lengths to assist you in filing a bug report. Specifically, Clang -generates preprocessed source file(s) and associated run script(s) upon a -crash. These files should be attached to a bug report to ease reproducibility -of the failure. Below are the command line options to control the crash -diagnostics. -</p> - -<p><b>-fno-crash-diagnostics</b>: Disable auto-generation of preprocessed -source files during a clang crash.</p> - -<p>The -fno-crash-diagnostics flag can be helpful for speeding the process of -generating a delta reduced test case.</p> - - -<!-- ======================================================================= --> -<h2 id="general_features">Language and Target-Independent Features</h2> -<!-- ======================================================================= --> - - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="diagnostics">Controlling Errors and Warnings</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p>Clang provides a number of ways to control which code constructs cause it to -emit errors and warning messages, and how they are displayed to the console.</p> - -<h4 id="diagnostics_display">Controlling How Clang Displays Diagnostics</h4> - -<p>When Clang emits a diagnostic, it includes rich information in the output, -and gives you fine-grain control over which information is printed. Clang has -the ability to print this information, and these are the options that control -it:</p> - -<ol> -<li>A file/line/column indicator that shows exactly where the diagnostic occurs - in your code [<a href="#opt_fshow-column">-fshow-column</a>, <a - href="#opt_fshow-source-location">-fshow-source-location</a>].</li> -<li>A categorization of the diagnostic as a note, warning, error, or fatal - error.</li> -<li>A text string that describes what the problem is.</li> -<li>An option that indicates how to control the diagnostic (for diagnostics that - support it) [<a - href="#opt_fdiagnostics-show-option">-fdiagnostics-show-option</a>].</li> -<li>A <a href="#diagnostics_categories">high-level category</a> for the - diagnostic for clients that want to group diagnostics by class (for - diagnostics that support it) [<a - href="#opt_fdiagnostics-show-category">-fdiagnostics-show-category</a>].</li> -<li>The line of source code that the issue occurs on, along with a caret and - ranges that indicate the important locations [<a - href="opt_fcaret-diagnostics">-fcaret-diagnostics</a>].</li> -<li>"FixIt" information, which is a concise explanation of how to fix the - problem (when Clang is certain it knows) [<a - href="opt_fdiagnostics-fixit-info">-fdiagnostics-fixit-info</a>].</li> -<li>A machine-parsable representation of the ranges involved (off by - default) [<a - href="opt_fdiagnostics-print-source-range-info">-fdiagnostics-print-source-range-info</a>].</li> -</ol> - -<p>For more information please see <a href="#cl_diag_formatting">Formatting of -Diagnostics</a>.</p> - - -<h4 id="diagnostics_mappings">Diagnostic Mappings</h4> - -<p>All diagnostics are mapped into one of these 5 classes:</p> - -<ul> -<li>Ignored</li> -<li>Note</li> -<li>Warning</li> -<li>Error</li> -<li>Fatal</li> -</ul> - -<h4 id="diagnostics_categories">Diagnostic Categories</h4> - -<p>Though not shown by default, diagnostics may each be associated with a - high-level category. This category is intended to make it possible to triage - builds that produce a large number of errors or warnings in a grouped way. -</p> - -<p>Categories are not shown by default, but they can be turned on with the -<a href="#opt_fdiagnostics-show-category">-fdiagnostics-show-category</a> option. -When set to "<tt>name</tt>", the category is printed textually in the diagnostic -output. When it is set to "<tt>id</tt>", a category number is printed. The -mapping of category names to category id's can be obtained by running '<tt>clang - --print-diagnostic-categories</tt>'. -</p> - -<h4 id="diagnostics_commandline">Controlling Diagnostics via Command Line - Flags</h4> - -<p>TODO: -W flags, -pedantic, etc</p> - -<h4 id="diagnostics_pragmas">Controlling Diagnostics via Pragmas</h4> - -<p>Clang can also control what diagnostics are enabled through the use of -pragmas in the source code. This is useful for turning off specific warnings -in a section of source code. Clang supports GCC's pragma for compatibility -with existing source code, as well as several extensions. </p> - -<p>The pragma may control any warning that can be used from the command line. -Warnings may be set to ignored, warning, error, or fatal. The following -example code will tell Clang or GCC to ignore the -Wall warnings:</p> - -<pre> -#pragma GCC diagnostic ignored "-Wall" -</pre> - -<p>In addition to all of the functionality provided by GCC's pragma, Clang -also allows you to push and pop the current warning state. This is particularly -useful when writing a header file that will be compiled by other people, because -you don't know what warning flags they build with.</p> - -<p>In the below example --Wmultichar is ignored for only a single line of code, after which the -diagnostics return to whatever state had previously existed.</p> - -<pre> -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wmultichar" - -char b = 'df'; // no warning. - -#pragma clang diagnostic pop -</pre> - -<p>The push and pop pragmas will save and restore the full diagnostic state of -the compiler, regardless of how it was set. That means that it is possible to -use push and pop around GCC compatible diagnostics and Clang will push and pop -them appropriately, while GCC will ignore the pushes and pops as unknown -pragmas. It should be noted that while Clang supports the GCC pragma, Clang and -GCC do not support the exact same set of warnings, so even when using GCC -compatible #pragmas there is no guarantee that they will have identical behaviour -on both compilers. </p> - -<h4 id="diagnostics_systemheader">Controlling Diagnostics in System Headers</h4> - -<p>Warnings are suppressed when they occur in system headers. By default, an -included file is treated as a system header if it is found in an include path -specified by <tt>-isystem</tt>, but this can be overridden in several ways.</p> - -<p>The <tt>system_header</tt> pragma can be used to mark the current file as -being a system header. No warnings will be produced from the location of the -pragma onwards within the same file.</p> - -<pre> -char a = 'xy'; // warning - -#pragma clang system_header - -char b = 'ab'; // no warning -</pre> - -<p>The <tt>-isystem-prefix</tt> and <tt>-ino-system-prefix</tt> command-line -arguments can be used to override whether subsets of an include path are treated -as system headers. When the name in a <tt>#include</tt> directive is found -within a header search path and starts with a system prefix, the header is -treated as a system header. The last prefix on the command-line which matches -the specified header name takes precedence. For instance:</p> - -<pre> -clang -Ifoo -isystem bar -isystem-prefix x/ -ino-system-prefix x/y/ -</pre> - -<p>Here, <tt>#include "x/a.h"</tt> is treated as including a system header, even -if the header is found in <tt>foo</tt>, and <tt>#include "x/y/b.h"</tt> is -treated as not including a system header, even if the header is found in -<tt>bar</tt>. -</p> - -<p>A <tt>#include</tt> directive which finds a file relative to the current -directory is treated as including a system header if the including file is -treated as a system header.</p> - -<h4 id="diagnostics_enable_everything">Enabling All Warnings</h4> - -<p>In addition to the traditional <tt>-W</tt> flags, one can enable <b>all</b> - warnings by passing <tt>-Weverything</tt>. - This works as expected with <tt>-Werror</tt>, - and also includes the warnings from <tt>-pedantic</tt>.</p> - -<p>Note that when combined with <tt>-w</tt> (which disables all warnings), that - flag wins.</p> - -<h4 id="analyzer_diagnositics">Controlling Static Analyzer Diagnostics</h4> - -<p>While not strictly part of the compiler, the diagnostics from Clang's <a -href="http://clang-analyzer.llvm.org">static analyzer</a> can also be influenced -by the user via changes to the source code. See the available -<a href = "http://clang-analyzer.llvm.org/annotations.html" >annotations</a> and -the analyzer's -<a href= "http://clang-analyzer.llvm.org/faq.html#exclude_code" >FAQ page</a> for -more information. - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="precompiledheaders">Precompiled Headers</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p><a href="http://en.wikipedia.org/wiki/Precompiled_header">Precompiled -headers</a> are a general approach employed by many compilers to reduce -compilation time. The underlying motivation of the approach is that it is -common for the same (and often large) header files to be included by -multiple source files. Consequently, compile times can often be greatly improved -by caching some of the (redundant) work done by a compiler to process headers. -Precompiled header files, which represent one of many ways to implement -this optimization, are literally files that represent an on-disk cache that -contains the vital information necessary to reduce some of the work -needed to process a corresponding header file. While details of precompiled -headers vary between compilers, precompiled headers have been shown to be -highly effective at speeding up program compilation on systems with very large -system headers (e.g., Mac OS/X).</p> - -<h4>Generating a PCH File</h4> - -<p>To generate a PCH file using Clang, one invokes Clang with -the <b><tt>-x <i><language></i>-header</tt></b> option. This mirrors the -interface in GCC for generating PCH files:</p> - -<pre> - $ gcc -x c-header test.h -o test.h.gch - $ clang -x c-header test.h -o test.h.pch -</pre> - -<h4>Using a PCH File</h4> - -<p>A PCH file can then be used as a prefix header when a -<b><tt>-include</tt></b> option is passed to <tt>clang</tt>:</p> - -<pre> - $ clang -include test.h test.c -o test -</pre> - -<p>The <tt>clang</tt> driver will first check if a PCH file for <tt>test.h</tt> -is available; if so, the contents of <tt>test.h</tt> (and the files it includes) -will be processed from the PCH file. Otherwise, Clang falls back to -directly processing the content of <tt>test.h</tt>. This mirrors the behavior of -GCC.</p> - -<p><b>NOTE:</b> Clang does <em>not</em> automatically use PCH files -for headers that are directly included within a source file. For example:</p> - -<pre> - $ clang -x c-header test.h -o test.h.pch - $ cat test.c - #include "test.h" - $ clang test.c -o test -</pre> - -<p>In this example, <tt>clang</tt> will not automatically use the PCH file for -<tt>test.h</tt> since <tt>test.h</tt> was included directly in the source file -and not specified on the command line using <tt>-include</tt>.</p> - -<h4>Relocatable PCH Files</h4> -<p>It is sometimes necessary to build a precompiled header from headers that -are not yet in their final, installed locations. For example, one might build a -precompiled header within the build tree that is then meant to be installed -alongside the headers. Clang permits the creation of "relocatable" precompiled -headers, which are built with a given path (into the build directory) and can -later be used from an installed location.</p> - -<p>To build a relocatable precompiled header, place your headers into a -subdirectory whose structure mimics the installed location. For example, if you -want to build a precompiled header for the header <code>mylib.h</code> that -will be installed into <code>/usr/include</code>, create a subdirectory -<code>build/usr/include</code> and place the header <code>mylib.h</code> into -that subdirectory. If <code>mylib.h</code> depends on other headers, then -they can be stored within <code>build/usr/include</code> in a way that mimics -the installed location.</p> - -<p>Building a relocatable precompiled header requires two additional arguments. -First, pass the <code>--relocatable-pch</code> flag to indicate that the -resulting PCH file should be relocatable. Second, pass -<code>-isysroot /path/to/build</code>, which makes all includes for your -library relative to the build directory. For example:</p> - -<pre> - # clang -x c-header --relocatable-pch -isysroot /path/to/build /path/to/build/mylib.h mylib.h.pch -</pre> - -<p>When loading the relocatable PCH file, the various headers used in the PCH -file are found from the system header root. For example, <code>mylib.h</code> -can be found in <code>/usr/include/mylib.h</code>. If the headers are installed -in some other system root, the <code>-isysroot</code> option can be used provide -a different system root from which the headers will be based. For example, -<code>-isysroot /Developer/SDKs/MacOSX10.4u.sdk</code> will look for -<code>mylib.h</code> in -<code>/Developer/SDKs/MacOSX10.4u.sdk/usr/include/mylib.h</code>.</p> - -<p>Relocatable precompiled headers are intended to be used in a limited number -of cases where the compilation environment is tightly controlled and the -precompiled header cannot be generated after headers have been installed. -Relocatable precompiled headers also have some performance impact, because -the difference in location between the header locations at PCH build time vs. -at the time of PCH use requires one of the PCH optimizations, -<code>stat()</code> caching, to be disabled. However, this change is only -likely to affect PCH files that reference a large number of headers.</p> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="codegen">Controlling Code Generation</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p>Clang provides a number of ways to control code generation. The options are listed below.</p> - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dl> -<dt id="opt_fsanitize"><b>-fsanitize=check1,check2</b>: Turn on runtime checks -for various forms of undefined behavior.</dt> - -<dd>This option controls whether Clang adds runtime checks for various forms of -undefined behavior, and is disabled by default. If a check fails, a diagnostic -message is produced at runtime explaining the problem. The main checks are: - -<ul> -<li id="opt_fsanitize_address"><tt>-fsanitize=address</tt>: - <a href="AddressSanitizer.html">AddressSanitizer</a>, a memory error - detector.</li> -<li id="opt_fsanitize_thread"><tt>-fsanitize=thread</tt>: - <a href="ThreadSanitizer.html">ThreadSanitizer</a>, an <em>experimental</em> - data race detector. Not ready for widespread use.</li> -<li id="opt_fsanitize_undefined"><tt>-fsanitize=undefined</tt>: - Enables all the checks listed below.</li> -</ul> - -The following more fine-grained checks are also available: - -<ul> -<li id="opt_fsanitize_alignment"><tt>-fsanitize=alignment</tt>: - Use of a misaligned pointer or creation of a misaligned reference.</li> -<li id="opt_fsanitize_divide-by-zero"><tt>-fsanitize=divide-by-zero</tt>: - Division by zero.</li> -<li id="opt_fsanitize_float-cast-overflow"><tt>-fsanitize=float-cast-overflow</tt>: - Conversion to, from, or between floating-point types which would overflow - the destination.</li> -<li id="opt_fsanitize_null"><tt>-fsanitize=null</tt>: - Use of a null pointer or creation of a null reference.</li> -<li id="opt_fsanitize_object-size"><tt>-fsanitize=object-size</tt>: - An attempt to use bytes which the optimizer can determine are not part of - the object being accessed. - The sizes of objects are determined using <tt>__builtin_object_size</tt>, and - consequently may be able to detect more problems at higher optimization - levels.</li> -<li id="opt_fsanitize_return"><tt>-fsanitize=return</tt>: - In C++, reaching the end of a value-returning function without returning a - value.</li> -<li id="opt_fsanitize_shift"><tt>-fsanitize=shift</tt>: - Shift operators where the amount shifted is greater or equal to the - promoted bit-width of the left hand side or less than zero, or where - the left hand side is negative. For a signed left shift, also checks - for signed overflow in C, and for unsigned overflow in C++.</li> -<li id="opt_fsanitize_signed-integer-overflow"><tt>-fsanitize=signed-integer-overflow</tt>: - Signed integer overflow, including all the checks added by <tt>-ftrapv</tt>, - and checking for overflow in signed division (<tt>INT_MIN / -1</tt>).</li> -<li id="opt_fsanitize_unreachable"><tt>-fsanitize=unreachable</tt>: - If control flow reaches __builtin_unreachable.</li> -<li id="opt_fsanitize_vla-bound"><tt>-fsanitize=vla-bound</tt>: - A variable-length array whose bound does not evaluate to a positive value.</li> -<li id="opt_fsanitize_vptr"><tt>-fsanitize=vptr</tt>: - Use of an object whose vptr indicates that it is of the wrong dynamic type, - or that its lifetime has not begun or has ended. Incompatible with - <tt>-fno-rtti</tt>.</li> -</ul> - -The <tt>-fsanitize=</tt> argument must also be provided when linking, in order -to link to the appropriate runtime library. It is not possible to combine the -<tt>-fsanitize=address</tt> and <tt>-fsanitize=thread</tt> checkers in the same -program. -</dd> - -<dt id="opt_faddress-sanitizer"><b>-f[no-]address-sanitizer</b>: -Deprecated synonym for <a href="#opt_fsanitize_address"><tt>-f[no-]sanitize=address</tt></a>. - -<dt id="opt_fthread-sanitizer"><b>-f[no-]thread-sanitizer</b>: -Deprecated synonym for <a href="#opt_fsanitize_address"><tt>-f[no-]sanitize=thread</tt></a>. - -<dt id="opt_fcatch-undefined-behavior"><b>-fcatch-undefined-behavior</b>: -Deprecated synonym for <a href="#opt_fsanitize_undefined"><tt>-fsanitize=undefined</tt></a>. - -<dt id="opt_fno-assume-sane-operator-new"><b>-fno-assume-sane-operator-new</b>: -Don't assume that the C++'s new operator is sane.</dt> -<dd>This option tells the compiler to do not assume that C++'s global new -operator will always return a pointer that does not -alias any other pointer when the function returns.</dd> - -<dt id="opt_ftrap-function"><b>-ftrap-function=[name]</b>: Instruct code -generator to emit a function call to the specified function name for -<tt>__builtin_trap()</tt>.</dt> - -<dd>LLVM code generator translates <tt>__builtin_trap()</tt> to a trap -instruction if it is supported by the target ISA. Otherwise, the builtin is -translated into a call to <tt>abort</tt>. If this option is set, then the code -generator will always lower the builtin to a call to the specified function -regardless of whether the target ISA has a trap instruction. This option is -useful for environments (e.g. deeply embedded) where a trap cannot be properly -handled, or when some custom behavior is desired.</dd> - -<dt id="opt_ftls-model"><b>-ftls-model=[model]</b>: Select which TLS model to -use.</dt> -<dd>Valid values are: <tt>global-dynamic</tt>, <tt>local-dynamic</tt>, -<tt>initial-exec</tt> and <tt>local-exec</tt>. The default value is -<tt>global-dynamic</tt>. The compiler may use a different model if the selected -model is not supported by the target, or if a more efficient model can be used. -The TLS model can be overridden per variable using the <tt>tls_model</tt> -attribute. -</dd> -</dl> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="debuginfosize">Controlling Size of Debug Information</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p>Debug info kind generated by Clang can be set by one of the flags listed -below. If multiple flags are present, the last one is used.</p> - -<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> -<dl> -<dt id="opt_g0"><b>-g0</b>: Don't generate any debug info (default). - -<dt id="opt_gline-tables-only"><b>-gline-tables-only</b>: -Generate line number tables only. -<dd> -This kind of debug info allows to obtain stack traces with function -names, file names and line numbers (by such tools as -gdb or addr2line). It doesn't contain any other data (e.g. -description of local variables or function parameters). -</dd> - -<dt id="opt_g"><b>-g</b>: Generate complete debug info. -</dl> - -<!-- ======================================================================= --> -<h2 id="c">C Language Features</h2> -<!-- ======================================================================= --> - -<p>The support for standard C in clang is feature-complete except for the C99 -floating-point pragmas.</p> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="c_ext">Extensions supported by clang</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p>See <a href="LanguageExtensions.html">clang language extensions</a>.</p> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="c_modes">Differences between various standard modes</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p>clang supports the -std option, which changes what language mode clang uses. -The supported modes for C are c89, gnu89, c94, c99, gnu99 and various aliases -for those modes. If no -std option is specified, clang defaults to gnu99 mode. -</p> - -<p>Differences between all c* and gnu* modes:</p> -<ul> -<li>c* modes define "__STRICT_ANSI__".</li> -<li>Target-specific defines not prefixed by underscores, like "linux", are -defined in gnu* modes.</li> -<li>Trigraphs default to being off in gnu* modes; they can be enabled by the --trigraphs option.</li> -<li>The parser recognizes "asm" and "typeof" as keywords in gnu* modes; the -variants "__asm__" and "__typeof__" are recognized in all modes.</li> -<li>The Apple "blocks" extension is recognized by default in gnu* modes -on some platforms; it can be enabled in any mode with the "-fblocks" -option.</li> -<li>Arrays that are VLA's according to the standard, but which can be constant - folded by the frontend are treated as fixed size arrays. This occurs for - things like "int X[(1, 2)];", which is technically a VLA. c* modes are - strictly compliant and treat these as VLAs.</li> -</ul> - -<p>Differences between *89 and *99 modes:</p> -<ul> -<li>The *99 modes default to implementing "inline" as specified in C99, while -the *89 modes implement the GNU version. This can be overridden for individual -functions with the __gnu_inline__ attribute.</li> -<li>Digraphs are not recognized in c89 mode.</li> -<li>The scope of names defined inside a "for", "if", "switch", "while", or "do" -statement is different. (example: "if ((struct x {int x;}*)0) {}".)</li> -<li>__STDC_VERSION__ is not defined in *89 modes.</li> -<li>"inline" is not recognized as a keyword in c89 mode.</li> -<li>"restrict" is not recognized as a keyword in *89 modes.</li> -<li>Commas are allowed in integer constant expressions in *99 modes.</li> -<li>Arrays which are not lvalues are not implicitly promoted to pointers in -*89 modes.</li> -<li>Some warnings are different.</li> -</ul> - -<p>c94 mode is identical to c89 mode except that digraphs are enabled in -c94 mode (FIXME: And __STDC_VERSION__ should be defined!).</p> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="c_unimpl_gcc">GCC extensions not implemented yet</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p>clang tries to be compatible with gcc as much as possible, but some gcc -extensions are not implemented yet:</p> - -<ul> - -<li>clang does not support #pragma weak -(<a href="http://llvm.org/bugs/show_bug.cgi?id=3679">bug 3679</a>). Due to -the uses described in the bug, this is likely to be implemented at some -point, at least partially.</li> - -<li>clang does not support decimal floating point types (_Decimal32 and -friends) or fixed-point types (_Fract and friends); nobody has expressed -interest in these features yet, so it's hard to say when they will be -implemented.</li> - -<li>clang does not support nested functions; this is a complex feature which -is infrequently used, so it is unlikely to be implemented anytime soon. In C++11 -it can be emulated by assigning lambda functions to local variables, e.g: -<pre> - auto const local_function = [&](int parameter) { - // Do something - }; - ... - local_function(1); -</pre> -</li> - -<li>clang does not support global register variables; this is unlikely -to be implemented soon because it requires additional LLVM backend support. -</li> - -<li>clang does not support static initialization of flexible array -members. This appears to be a rarely used extension, but could be -implemented pending user demand.</li> - -<li>clang does not support __builtin_va_arg_pack/__builtin_va_arg_pack_len. -This is used rarely, but in some potentially interesting places, like the -glibc headers, so it may be implemented pending user demand. Note that -because clang pretends to be like GCC 4.2, and this extension was introduced -in 4.3, the glibc headers will not try to use this extension with clang at -the moment.</li> - -<li>clang does not support the gcc extension for forward-declaring function -parameters; this has not shown up in any real-world code yet, though, so it -might never be implemented.</li> - -</ul> - -<p>This is not a complete list; if you find an unsupported extension -missing from this list, please send an e-mail to cfe-dev. This list -currently excludes C++; see <a href="#cxx">C++ Language Features</a>. -Also, this list does not include bugs in mostly-implemented features; please -see the <a href="http://llvm.org/bugs/buglist.cgi?quicksearch=product%3Aclang+component%3A-New%2BBugs%2CAST%2CBasic%2CDriver%2CHeaders%2CLLVM%2BCodeGen%2Cparser%2Cpreprocessor%2CSemantic%2BAnalyzer"> -bug tracker</a> for known existing bugs (FIXME: Is there a section for -bug-reporting guidelines somewhere?).</p> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="c_unsupp_gcc">Intentionally unsupported GCC extensions</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<ul> - -<li>clang does not support the gcc extension that allows variable-length arrays -in structures. This is for a few reasons: one, it is tricky -to implement, two, the extension is completely undocumented, and three, the -extension appears to be rarely used. Note that clang <em>does</em> support -flexible array members (arrays with a zero or unspecified size at the end of -a structure).</li> - -<li>clang does not have an equivalent to gcc's "fold"; this means that -clang doesn't accept some constructs gcc might accept in contexts where a -constant expression is required, like "x-x" where x is a variable.</li> - -<li>clang does not support __builtin_apply and friends; this extension is -extremely obscure and difficult to implement reliably.</li> - -</ul> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="c_ms">Microsoft extensions</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p>clang has some experimental support for extensions from -Microsoft Visual C++; to enable it, use the -fms-extensions command-line -option. This is the default for Windows targets. Note that the -support is incomplete; enabling Microsoft extensions will silently drop -certain constructs (including __declspec and Microsoft-style asm statements). -</p> - -<p>clang has a -fms-compatibility flag that makes clang accept enough -invalid C++ to be able to parse most Microsoft headers. This flag is enabled by -default for Windows targets.</p> - -<p>-fdelayed-template-parsing lets clang delay all template instantiation until -the end of a translation unit. This flag is enabled by default for Windows -targets.</p> - -<ul> -<li>clang allows setting _MSC_VER with -fmsc-version=. It defaults to 1300 which -is the same as Visual C/C++ 2003. Any number is supported and can greatly affect -what Windows SDK and c++stdlib headers clang can compile. This option will be -removed when clang supports the full set of MS extensions required for these -headers.</li> - -<li>clang does not support the Microsoft extension where anonymous -record members can be declared using user defined typedefs.</li> - -<li>clang supports the Microsoft "#pragma pack" feature for -controlling record layout. GCC also contains support for this feature, -however where MSVC and GCC are incompatible clang follows the MSVC -definition.</li> - -<li>clang defaults to C++11 for Windows targets.</li> -</ul> - -<!-- ======================================================================= --> -<h2 id="cxx">C++ Language Features</h2> -<!-- ======================================================================= --> - -<p>clang fully implements all of standard C++98 except for exported templates -(which were removed in C++11), and -<a href="http://clang.llvm.org/cxx_status.html">many C++11 features</a> are also -implemented.</p> - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="cxx_implimits">Controlling implementation limits</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<p><b>-fconstexpr-depth=N</b>: Sets the limit for recursive constexpr function -invocations to N. The default is 512.</p> - -<p><b>-ftemplate-depth=N</b>: Sets the limit for recursively nested template -instantiations to N. The default is 1024.</p> - -<!-- ======================================================================= --> -<h2 id="target_features">Target-Specific Features and Limitations</h2> -<!-- ======================================================================= --> - - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="target_arch">CPU Architectures Features and Limitations</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<!-- ======================== --> -<h4 id="target_arch_x86">X86</h4> -<!-- ======================== --> - -<p>The support for X86 (both 32-bit and 64-bit) is considered stable on Darwin -(Mac OS/X), Linux, FreeBSD, and Dragonfly BSD: it has been tested to correctly -compile many large C, C++, Objective-C, and Objective-C++ codebases.</p> - -<p>On x86_64-mingw32, passing i128(by value) is incompatible to Microsoft x64 -calling conversion. You might need to tweak WinX86_64ABIInfo::classify() -in lib/CodeGen/TargetInfo.cpp.</p> - -<!-- ======================== --> -<h4 id="target_arch_arm">ARM</h4> -<!-- ======================== --> - -<p>The support for ARM (specifically ARMv6 and ARMv7) is considered stable on -Darwin (iOS): it has been tested to correctly compile many large C, C++, -Objective-C, and Objective-C++ codebases. Clang only supports a limited number -of ARM architectures. It does not yet fully support ARMv5, for example.</p> - -<!-- ======================== --> -<h4 id="target_arch_other">Other platforms</h4> -<!-- ======================== --> -clang currently contains some support for PPC and Sparc; however, significant -pieces of code generation are still missing, and they haven't undergone -significant testing. - -<p>clang contains limited support for the MSP430 embedded processor, but both -the clang support and the LLVM backend support are highly experimental. - -<p>Other platforms are completely unsupported at the moment. Adding the -minimal support needed for parsing and semantic analysis on a new platform -is quite easy; see lib/Basic/Targets.cpp in the clang source tree. This level -of support is also sufficient for conversion to LLVM IR for simple programs. -Proper support for conversion to LLVM IR requires adding code to -lib/CodeGen/CGCall.cpp at the moment; this is likely to change soon, though. -Generating assembly requires a suitable LLVM backend. - -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> -<h3 id="target_os">Operating System Features and Limitations</h3> -<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = --> - -<!-- ======================================= --> -<h4 id="target_os_darwin">Darwin (Mac OS/X)</h4> -<!-- ======================================= --> - -<p>None</p> - -<!-- ======================================= --> -<h4 id="target_os_win32">Windows</h4> -<!-- ======================================= --> - -<p>Experimental supports are on Cygming.</p> - -<p>See also <a href="#c_ms">Microsoft Extensions</a>.</p> - -<h5>Cygwin</h5> - -<p>Clang works on Cygwin-1.7.</p> - -<h5>MinGW32</h5> - -<p>Clang works on some mingw32 distributions. -Clang assumes directories as below;</p> - -<ul> -<li><tt>C:/mingw/include</tt></li> -<li><tt>C:/mingw/lib</tt></li> -<li><tt>C:/mingw/lib/gcc/mingw32/4.[3-5].0/include/c++</tt></li> -</ul> - -<p>On MSYS, a few tests might fail.</p> - -<h5>MinGW-w64</h5> - -<p>For 32-bit (i686-w64-mingw32), and 64-bit (x86_64-w64-mingw32), Clang assumes as below;<p> - -<ul> -<li><tt>GCC versions 4.5.0 to 4.5.3, 4.6.0 to 4.6.2, or 4.7.0 (for the C++ header search path)</tt></li> -<li><tt>some_directory/bin/gcc.exe</tt></li> -<li><tt>some_directory/bin/clang.exe</tt></li> -<li><tt>some_directory/bin/clang++.exe</tt></li> -<li><tt>some_directory/bin/../include/c++/GCC_version</tt></li> -<li><tt>some_directory/bin/../include/c++/GCC_version/x86_64-w64-mingw32</tt></li> -<li><tt>some_directory/bin/../include/c++/GCC_version/i686-w64-mingw32</tt></li> -<li><tt>some_directory/bin/../include/c++/GCC_version/backward</tt></li> -<li><tt>some_directory/bin/../x86_64-w64-mingw32/include</tt></li> -<li><tt>some_directory/bin/../i686-w64-mingw32/include</tt></li> -<li><tt>some_directory/bin/../include</tt></li> -</ul> - -<p>This directory layout is standard for any toolchain you will find on the official <a href="http://mingw-w64.sourceforge.net">MinGW-w64 website</a>. - -<p>Clang expects the GCC executable "gcc.exe" compiled for i686-w64-mingw32 (or x86_64-w64-mingw32) to be present on PATH.</p> - -<p><a href="http://llvm.org/bugs/show_bug.cgi?id=9072">Some tests might fail</a> -on x86_64-w64-mingw32.</p> - -</div> -</body> -</html> diff --git a/docs/UsersManual.rst b/docs/UsersManual.rst new file mode 100644 index 0000000..6cc8361 --- /dev/null +++ b/docs/UsersManual.rst @@ -0,0 +1,1313 @@ +============================ +Clang Compiler User's Manual +============================ + +.. contents:: + :local: + +Introduction +============ + +The Clang Compiler is an open-source compiler for the C family of +programming languages, aiming to be the best in class implementation of +these languages. Clang builds on the LLVM optimizer and code generator, +allowing it to provide high-quality optimization and code generation +support for many targets. For more general information, please see the +`Clang Web Site <http://clang.llvm.org>`_ or the `LLVM Web +Site <http://llvm.org>`_. + +This document describes important notes about using Clang as a compiler +for an end-user, documenting the supported features, command line +options, etc. If you are interested in using Clang to build a tool that +processes code, please see :doc:`InternalsManual`. If you are interested in the +`Clang Static Analyzer <http://clang-analyzer.llvm.org>`_, please see its web +page. + +Clang is designed to support the C family of programming languages, +which includes :ref:`C <c>`, :ref:`Objective-C <objc>`, :ref:`C++ <cxx>`, and +:ref:`Objective-C++ <objcxx>` as well as many dialects of those. For +language-specific information, please see the corresponding language +specific section: + +- :ref:`C Language <c>`: K&R C, ANSI C89, ISO C90, ISO C94 (C89+AMD1), ISO + C99 (+TC1, TC2, TC3). +- :ref:`Objective-C Language <objc>`: ObjC 1, ObjC 2, ObjC 2.1, plus + variants depending on base language. +- :ref:`C++ Language <cxx>` +- :ref:`Objective C++ Language <objcxx>` + +In addition to these base languages and their dialects, Clang supports a +broad variety of language extensions, which are documented in the +corresponding language section. These extensions are provided to be +compatible with the GCC, Microsoft, and other popular compilers as well +as to improve functionality through Clang-specific features. The Clang +driver and language features are intentionally designed to be as +compatible with the GNU GCC compiler as reasonably possible, easing +migration from GCC to Clang. In most cases, code "just works". + +In addition to language specific features, Clang has a variety of +features that depend on what CPU architecture or operating system is +being compiled for. Please see the :ref:`Target-Specific Features and +Limitations <target_features>` section for more details. + +The rest of the introduction introduces some basic :ref:`compiler +terminology <terminology>` that is used throughout this manual and +contains a basic :ref:`introduction to using Clang <basicusage>` as a +command line compiler. + +.. _terminology: + +Terminology +----------- + +Front end, parser, backend, preprocessor, undefined behavior, +diagnostic, optimizer + +.. _basicusage: + +Basic Usage +----------- + +Intro to how to use a C compiler for newbies. + +compile + link compile then link debug info enabling optimizations +picking a language to use, defaults to C99 by default. Autosenses based +on extension. using a makefile + +Command Line Options +==================== + +This section is generally an index into other sections. It does not go +into depth on the ones that are covered by other sections. However, the +first part introduces the language selection and other high level +options like :option:`-c`, :option:`-g`, etc. + +Options to Control Error and Warning Messages +--------------------------------------------- + +.. option:: -Werror + + Turn warnings into errors. + +.. This is in plain monospaced font because it generates the same label as +.. -Werror, and Sphinx complains. + +``-Werror=foo`` + + Turn warning "foo" into an error. + +.. option:: -Wno-error=foo + + Turn warning "foo" into an warning even if :option:`-Werror` is specified. + +.. option:: -Wfoo + + Enable warning "foo". + +.. option:: -Wno-foo + + Disable warning "foo". + +.. option:: -w + + Disable all warnings. + +.. option:: -Weverything + + :ref:`Enable all warnings. <diagnostics_enable_everything>` + +.. option:: -pedantic + + Warn on language extensions. + +.. option:: -pedantic-errors + + Error on language extensions. + +.. option:: -Wsystem-headers + + Enable warnings from system headers. + +.. option:: -ferror-limit=123 + + Stop emitting diagnostics after 123 errors have been produced. The default is + 20, and the error limit can be disabled with :option:`-ferror-limit=0`. + +.. option:: -ftemplate-backtrace-limit=123 + + Only emit up to 123 template instantiation notes within the template + instantiation backtrace for a single warning or error. The default is 10, and + the limit can be disabled with :option:`-ftemplate-backtrace-limit=0`. + +.. _cl_diag_formatting: + +Formatting of Diagnostics +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Clang aims to produce beautiful diagnostics by default, particularly for +new users that first come to Clang. However, different people have +different preferences, and sometimes Clang is driven by another program +that wants to parse simple and consistent output, not a person. For +these cases, Clang provides a wide range of options to control the exact +output format of the diagnostics that it generates. + +.. _opt_fshow-column: + +**-f[no-]show-column** + Print column number in diagnostic. + + This option, which defaults to on, controls whether or not Clang + prints the column number of a diagnostic. For example, when this is + enabled, Clang will print something like: + + :: + + test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + // + + When this is disabled, Clang will print "test.c:28: warning..." with + no column number. + + The printed column numbers count bytes from the beginning of the + line; take care if your source contains multibyte characters. + +.. _opt_fshow-source-location: + +**-f[no-]show-source-location** + Print source file/line/column information in diagnostic. + + This option, which defaults to on, controls whether or not Clang + prints the filename, line number and column number of a diagnostic. + For example, when this is enabled, Clang will print something like: + + :: + + test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + // + + When this is disabled, Clang will not print the "test.c:28:8: " + part. + +.. _opt_fcaret-diagnostics: + +**-f[no-]caret-diagnostics** + Print source line and ranges from source code in diagnostic. + This option, which defaults to on, controls whether or not Clang + prints the source line, source ranges, and caret when emitting a + diagnostic. For example, when this is enabled, Clang will print + something like: + + :: + + test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + // + +**-f[no-]color-diagnostics** + This option, which defaults to on when a color-capable terminal is + detected, controls whether or not Clang prints diagnostics in color. + + When this option is enabled, Clang will use colors to highlight + specific parts of the diagnostic, e.g., + + .. nasty hack to not lose our dignity + + .. raw:: html + + <pre> + <b><span style="color:black">test.c:28:8: <span style="color:magenta">warning</span>: extra tokens at end of #endif directive [-Wextra-tokens]</span></b> + #endif bad + <span style="color:green">^</span> + <span style="color:green">//</span> + </pre> + + When this is disabled, Clang will just print: + + :: + + test.c:2:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + // + +.. option:: -fdiagnostics-format=clang/msvc/vi + + Changes diagnostic output format to better match IDEs and command line tools. + + This option controls the output format of the filename, line number, + and column printed in diagnostic messages. The options, and their + affect on formatting a simple conversion diagnostic, follow: + + **clang** (default) + :: + + t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' + + **msvc** + :: + + t.c(3,11) : warning: conversion specifies type 'char *' but the argument has type 'int' + + **vi** + :: + + t.c +3:11: warning: conversion specifies type 'char *' but the argument has type 'int' + +**-f[no-]diagnostics-show-name** + Enable the display of the diagnostic name. + This option, which defaults to off, controls whether or not Clang + prints the associated name. + +.. _opt_fdiagnostics-show-option: + +**-f[no-]diagnostics-show-option** + Enable ``[-Woption]`` information in diagnostic line. + + This option, which defaults to on, controls whether or not Clang + prints the associated :ref:`warning group <cl_diag_warning_groups>` + option name when outputting a warning diagnostic. For example, in + this output: + + :: + + test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + // + + Passing **-fno-diagnostics-show-option** will prevent Clang from + printing the [:ref:`-Wextra-tokens <opt_Wextra-tokens>`] information in + the diagnostic. This information tells you the flag needed to enable + or disable the diagnostic, either from the command line or through + :ref:`#pragma GCC diagnostic <pragma_GCC_diagnostic>`. + +.. _opt_fdiagnostics-show-category: + +.. option:: -fdiagnostics-show-category=none/id/name + + Enable printing category information in diagnostic line. + + This option, which defaults to "none", controls whether or not Clang + prints the category associated with a diagnostic when emitting it. + Each diagnostic may or many not have an associated category, if it + has one, it is listed in the diagnostic categorization field of the + diagnostic line (in the []'s). + + For example, a format string warning will produce these three + renditions based on the setting of this option: + + :: + + t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat] + t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat,1] + t.c:3:11: warning: conversion specifies type 'char *' but the argument has type 'int' [-Wformat,Format String] + + This category can be used by clients that want to group diagnostics + by category, so it should be a high level category. We want dozens + of these, not hundreds or thousands of them. + +.. _opt_fdiagnostics-fixit-info: + +**-f[no-]diagnostics-fixit-info** + Enable "FixIt" information in the diagnostics output. + + This option, which defaults to on, controls whether or not Clang + prints the information on how to fix a specific diagnostic + underneath it when it knows. For example, in this output: + + :: + + test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + // + + Passing **-fno-diagnostics-fixit-info** will prevent Clang from + printing the "//" line at the end of the message. This information + is useful for users who may not understand what is wrong, but can be + confusing for machine parsing. + +.. _opt_fdiagnostics-print-source-range-info: + +**-fdiagnostics-print-source-range-info** + Print machine parsable information about source ranges. + This option makes Clang print information about source ranges in a machine + parsable format after the file/line/column number information. The + information is a simple sequence of brace enclosed ranges, where each range + lists the start and end line/column locations. For example, in this output: + + :: + + exprs.c:47:15:{47:8-47:14}{47:17-47:24}: error: invalid operands to binary expression ('int *' and '_Complex float') + P = (P-42) + Gamma*4; + ~~~~~~ ^ ~~~~~~~ + + The {}'s are generated by -fdiagnostics-print-source-range-info. + + The printed column numbers count bytes from the beginning of the + line; take care if your source contains multibyte characters. + +.. option:: -fdiagnostics-parseable-fixits + + Print Fix-Its in a machine parseable form. + + This option makes Clang print available Fix-Its in a machine + parseable format at the end of diagnostics. The following example + illustrates the format: + + :: + + fix-it:"t.cpp":{7:25-7:29}:"Gamma" + + The range printed is a half-open range, so in this example the + characters at column 25 up to but not including column 29 on line 7 + in t.cpp should be replaced with the string "Gamma". Either the + range or the replacement string may be empty (representing strict + insertions and strict erasures, respectively). Both the file name + and the insertion string escape backslash (as "\\\\"), tabs (as + "\\t"), newlines (as "\\n"), double quotes(as "\\"") and + non-printable characters (as octal "\\xxx"). + + The printed column numbers count bytes from the beginning of the + line; take care if your source contains multibyte characters. + +.. option:: -fno-elide-type + + Turns off elision in template type printing. + + The default for template type printing is to elide as many template + arguments as possible, removing those which are the same in both + template types, leaving only the differences. Adding this flag will + print all the template arguments. If supported by the terminal, + highlighting will still appear on differing arguments. + + Default: + + :: + + t.cc:4:5: note: candidate function not viable: no known conversion from 'vector<map<[...], map<float, [...]>>>' to 'vector<map<[...], map<double, [...]>>>' for 1st argument; + + -fno-elide-type: + + :: + + t.cc:4:5: note: candidate function not viable: no known conversion from 'vector<map<int, map<float, int>>>' to 'vector<map<int, map<double, int>>>' for 1st argument; + +.. option:: -fdiagnostics-show-template-tree + + Template type diffing prints a text tree. + + For diffing large templated types, this option will cause Clang to + display the templates as an indented text tree, one argument per + line, with differences marked inline. This is compatible with + -fno-elide-type. + + Default: + + :: + + t.cc:4:5: note: candidate function not viable: no known conversion from 'vector<map<[...], map<float, [...]>>>' to 'vector<map<[...], map<double, [...]>>>' for 1st argument; + + With :option:`-fdiagnostics-show-template-tree`: + + :: + + t.cc:4:5: note: candidate function not viable: no known conversion for 1st argument; + vector< + map< + [...], + map< + [float != float], + [...]>>> + +.. _cl_diag_warning_groups: + +Individual Warning Groups +^^^^^^^^^^^^^^^^^^^^^^^^^ + +TODO: Generate this from tblgen. Define one anchor per warning group. + +.. _opt_wextra-tokens: + +.. option:: -Wextra-tokens + + Warn about excess tokens at the end of a preprocessor directive. + + This option, which defaults to on, enables warnings about extra + tokens at the end of preprocessor directives. For example: + + :: + + test.c:28:8: warning: extra tokens at end of #endif directive [-Wextra-tokens] + #endif bad + ^ + + These extra tokens are not strictly conforming, and are usually best + handled by commenting them out. + +.. option:: -Wambiguous-member-template + + Warn about unqualified uses of a member template whose name resolves to + another template at the location of the use. + + This option, which defaults to on, enables a warning in the + following code: + + :: + + template<typename T> struct set{}; + template<typename T> struct trait { typedef const T& type; }; + struct Value { + template<typename T> void set(typename trait<T>::type value) {} + }; + void foo() { + Value v; + v.set<double>(3.2); + } + + C++ [basic.lookup.classref] requires this to be an error, but, + because it's hard to work around, Clang downgrades it to a warning + as an extension. + +.. option:: -Wbind-to-temporary-copy + + Warn about an unusable copy constructor when binding a reference to a + temporary. + + This option, which defaults to on, enables warnings about binding a + reference to a temporary when the temporary doesn't have a usable + copy constructor. For example: + + :: + + struct NonCopyable { + NonCopyable(); + private: + NonCopyable(const NonCopyable&); + }; + void foo(const NonCopyable&); + void bar() { + foo(NonCopyable()); // Disallowed in C++98; allowed in C++11. + } + + :: + + struct NonCopyable2 { + NonCopyable2(); + NonCopyable2(NonCopyable2&); + }; + void foo(const NonCopyable2&); + void bar() { + foo(NonCopyable2()); // Disallowed in C++98; allowed in C++11. + } + + Note that if ``NonCopyable2::NonCopyable2()`` has a default argument + whose instantiation produces a compile error, that error will still + be a hard error in C++98 mode even if this warning is turned off. + +Options to Control Clang Crash Diagnostics +------------------------------------------ + +As unbelievable as it may sound, Clang does crash from time to time. +Generally, this only occurs to those living on the `bleeding +edge <http://llvm.org/releases/download.html#svn>`_. Clang goes to great +lengths to assist you in filing a bug report. Specifically, Clang +generates preprocessed source file(s) and associated run script(s) upon +a crash. These files should be attached to a bug report to ease +reproducibility of the failure. Below are the command line options to +control the crash diagnostics. + +.. option:: -fno-crash-diagnostics + + Disable auto-generation of preprocessed source files during a clang crash. + +The -fno-crash-diagnostics flag can be helpful for speeding the process +of generating a delta reduced test case. + +Language and Target-Independent Features +======================================== + +Controlling Errors and Warnings +------------------------------- + +Clang provides a number of ways to control which code constructs cause +it to emit errors and warning messages, and how they are displayed to +the console. + +Controlling How Clang Displays Diagnostics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When Clang emits a diagnostic, it includes rich information in the +output, and gives you fine-grain control over which information is +printed. Clang has the ability to print this information, and these are +the options that control it: + +#. A file/line/column indicator that shows exactly where the diagnostic + occurs in your code [:ref:`-fshow-column <opt_fshow-column>`, + :ref:`-fshow-source-location <opt_fshow-source-location>`]. +#. A categorization of the diagnostic as a note, warning, error, or + fatal error. +#. A text string that describes what the problem is. +#. An option that indicates how to control the diagnostic (for + diagnostics that support it) + [:ref:`-fdiagnostics-show-option <opt_fdiagnostics-show-option>`]. +#. A :ref:`high-level category <diagnostics_categories>` for the diagnostic + for clients that want to group diagnostics by class (for diagnostics + that support it) + [:ref:`-fdiagnostics-show-category <opt_fdiagnostics-show-category>`]. +#. The line of source code that the issue occurs on, along with a caret + and ranges that indicate the important locations + [:ref:`-fcaret-diagnostics <opt_fcaret-diagnostics>`]. +#. "FixIt" information, which is a concise explanation of how to fix the + problem (when Clang is certain it knows) + [:ref:`-fdiagnostics-fixit-info <opt_fdiagnostics-fixit-info>`]. +#. A machine-parsable representation of the ranges involved (off by + default) + [:ref:`-fdiagnostics-print-source-range-info <opt_fdiagnostics-print-source-range-info>`]. + +For more information please see :ref:`Formatting of +Diagnostics <cl_diag_formatting>`. + +Diagnostic Mappings +^^^^^^^^^^^^^^^^^^^ + +All diagnostics are mapped into one of these 5 classes: + +- Ignored +- Note +- Warning +- Error +- Fatal + +.. _diagnostics_categories: + +Diagnostic Categories +^^^^^^^^^^^^^^^^^^^^^ + +Though not shown by default, diagnostics may each be associated with a +high-level category. This category is intended to make it possible to +triage builds that produce a large number of errors or warnings in a +grouped way. + +Categories are not shown by default, but they can be turned on with the +:ref:`-fdiagnostics-show-category <opt_fdiagnostics-show-category>` option. +When set to "``name``", the category is printed textually in the +diagnostic output. When it is set to "``id``", a category number is +printed. The mapping of category names to category id's can be obtained +by running '``clang --print-diagnostic-categories``'. + +Controlling Diagnostics via Command Line Flags +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +TODO: -W flags, -pedantic, etc + +.. _pragma_gcc_diagnostic: + +Controlling Diagnostics via Pragmas +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Clang can also control what diagnostics are enabled through the use of +pragmas in the source code. This is useful for turning off specific +warnings in a section of source code. Clang supports GCC's pragma for +compatibility with existing source code, as well as several extensions. + +The pragma may control any warning that can be used from the command +line. Warnings may be set to ignored, warning, error, or fatal. The +following example code will tell Clang or GCC to ignore the -Wall +warnings: + +.. code-block:: c + + #pragma GCC diagnostic ignored "-Wall" + +In addition to all of the functionality provided by GCC's pragma, Clang +also allows you to push and pop the current warning state. This is +particularly useful when writing a header file that will be compiled by +other people, because you don't know what warning flags they build with. + +In the below example :option:`-Wmultichar` is ignored for only a single line of +code, after which the diagnostics return to whatever state had previously +existed. + +.. code-block:: c + + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wmultichar" + + char b = 'df'; // no warning. + + #pragma clang diagnostic pop + +The push and pop pragmas will save and restore the full diagnostic state +of the compiler, regardless of how it was set. That means that it is +possible to use push and pop around GCC compatible diagnostics and Clang +will push and pop them appropriately, while GCC will ignore the pushes +and pops as unknown pragmas. It should be noted that while Clang +supports the GCC pragma, Clang and GCC do not support the exact same set +of warnings, so even when using GCC compatible #pragmas there is no +guarantee that they will have identical behaviour on both compilers. + +Controlling Diagnostics in System Headers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Warnings are suppressed when they occur in system headers. By default, +an included file is treated as a system header if it is found in an +include path specified by ``-isystem``, but this can be overridden in +several ways. + +The ``system_header`` pragma can be used to mark the current file as +being a system header. No warnings will be produced from the location of +the pragma onwards within the same file. + +.. code-block:: c + + char a = 'xy'; // warning + + #pragma clang system_header + + char b = 'ab'; // no warning + +The :option:`-isystem-prefix` and :option:`-ino-system-prefix` command-line +arguments can be used to override whether subsets of an include path are +treated as system headers. When the name in a ``#include`` directive is +found within a header search path and starts with a system prefix, the +header is treated as a system header. The last prefix on the +command-line which matches the specified header name takes precedence. +For instance: + +.. code-block:: console + + $ clang -Ifoo -isystem bar -isystem-prefix x/ -ino-system-prefix x/y/ + +Here, ``#include "x/a.h"`` is treated as including a system header, even +if the header is found in ``foo``, and ``#include "x/y/b.h"`` is treated +as not including a system header, even if the header is found in +``bar``. + +A ``#include`` directive which finds a file relative to the current +directory is treated as including a system header if the including file +is treated as a system header. + +.. _diagnostics_enable_everything: + +Enabling All Warnings +^^^^^^^^^^^^^^^^^^^^^ + +In addition to the traditional ``-W`` flags, one can enable **all** +warnings by passing :option:`-Weverything`. This works as expected with +:option:`-Werror`, and also includes the warnings from :option:`-pedantic`. + +Note that when combined with :option:`-w` (which disables all warnings), that +flag wins. + +Controlling Static Analyzer Diagnostics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +While not strictly part of the compiler, the diagnostics from Clang's +`static analyzer <http://clang-analyzer.llvm.org>`_ can also be +influenced by the user via changes to the source code. See the available +`annotations <http://clang-analyzer.llvm.org/annotations.html>`_ and the +analyzer's `FAQ +page <http://clang-analyzer.llvm.org/faq.html#exclude_code>`_ for more +information. + +.. _usersmanual-precompiled-headers: + +Precompiled Headers +------------------- + +`Precompiled headers <http://en.wikipedia.org/wiki/Precompiled_header>`__ +are a general approach employed by many compilers to reduce compilation +time. The underlying motivation of the approach is that it is common for +the same (and often large) header files to be included by multiple +source files. Consequently, compile times can often be greatly improved +by caching some of the (redundant) work done by a compiler to process +headers. Precompiled header files, which represent one of many ways to +implement this optimization, are literally files that represent an +on-disk cache that contains the vital information necessary to reduce +some of the work needed to process a corresponding header file. While +details of precompiled headers vary between compilers, precompiled +headers have been shown to be highly effective at speeding up program +compilation on systems with very large system headers (e.g., Mac OS/X). + +Generating a PCH File +^^^^^^^^^^^^^^^^^^^^^ + +To generate a PCH file using Clang, one invokes Clang with the +:option:`-x <language>-header` option. This mirrors the interface in GCC +for generating PCH files: + +.. code-block:: console + + $ gcc -x c-header test.h -o test.h.gch + $ clang -x c-header test.h -o test.h.pch + +Using a PCH File +^^^^^^^^^^^^^^^^ + +A PCH file can then be used as a prefix header when a :option:`-include` +option is passed to ``clang``: + +.. code-block:: console + + $ clang -include test.h test.c -o test + +The ``clang`` driver will first check if a PCH file for ``test.h`` is +available; if so, the contents of ``test.h`` (and the files it includes) +will be processed from the PCH file. Otherwise, Clang falls back to +directly processing the content of ``test.h``. This mirrors the behavior +of GCC. + +.. note:: + + Clang does *not* automatically use PCH files for headers that are directly + included within a source file. For example: + + .. code-block:: console + + $ clang -x c-header test.h -o test.h.pch + $ cat test.c + #include "test.h" + $ clang test.c -o test + + In this example, ``clang`` will not automatically use the PCH file for + ``test.h`` since ``test.h`` was included directly in the source file and not + specified on the command line using :option:`-include`. + +Relocatable PCH Files +^^^^^^^^^^^^^^^^^^^^^ + +It is sometimes necessary to build a precompiled header from headers +that are not yet in their final, installed locations. For example, one +might build a precompiled header within the build tree that is then +meant to be installed alongside the headers. Clang permits the creation +of "relocatable" precompiled headers, which are built with a given path +(into the build directory) and can later be used from an installed +location. + +To build a relocatable precompiled header, place your headers into a +subdirectory whose structure mimics the installed location. For example, +if you want to build a precompiled header for the header ``mylib.h`` +that will be installed into ``/usr/include``, create a subdirectory +``build/usr/include`` and place the header ``mylib.h`` into that +subdirectory. If ``mylib.h`` depends on other headers, then they can be +stored within ``build/usr/include`` in a way that mimics the installed +location. + +Building a relocatable precompiled header requires two additional +arguments. First, pass the ``--relocatable-pch`` flag to indicate that +the resulting PCH file should be relocatable. Second, pass +:option:`-isysroot /path/to/build`, which makes all includes for your library +relative to the build directory. For example: + +.. code-block:: console + + # clang -x c-header --relocatable-pch -isysroot /path/to/build /path/to/build/mylib.h mylib.h.pch + +When loading the relocatable PCH file, the various headers used in the +PCH file are found from the system header root. For example, ``mylib.h`` +can be found in ``/usr/include/mylib.h``. If the headers are installed +in some other system root, the :option:`-isysroot` option can be used provide +a different system root from which the headers will be based. For +example, :option:`-isysroot /Developer/SDKs/MacOSX10.4u.sdk` will look for +``mylib.h`` in ``/Developer/SDKs/MacOSX10.4u.sdk/usr/include/mylib.h``. + +Relocatable precompiled headers are intended to be used in a limited +number of cases where the compilation environment is tightly controlled +and the precompiled header cannot be generated after headers have been +installed. + +Controlling Code Generation +--------------------------- + +Clang provides a number of ways to control code generation. The options +are listed below. + +**-fsanitize=check1,check2,...** + Turn on runtime checks for various forms of undefined or suspicious + behavior. + + This option controls whether Clang adds runtime checks for various + forms of undefined or suspicious behavior, and is disabled by + default. If a check fails, a diagnostic message is produced at + runtime explaining the problem. The main checks are: + + - .. _opt_fsanitize_address: + + ``-fsanitize=address``: + :doc:`AddressSanitizer`, a memory error + detector. + - ``-fsanitize=init-order``: Make AddressSanitizer check for + dynamic initialization order problems. Implied by ``-fsanitize=address``. + - ``-fsanitize=address-full``: AddressSanitizer with all the + experimental features listed below. + - ``-fsanitize=integer``: Enables checks for undefined or + suspicious integer behavior. + - .. _opt_fsanitize_thread: + + ``-fsanitize=thread``: :doc:`ThreadSanitizer`, a data race detector. + - .. _opt_fsanitize_memory: + + ``-fsanitize=memory``: :doc:`MemorySanitizer`, + an *experimental* detector of uninitialized reads. Not ready for + widespread use. + - .. _opt_fsanitize_undefined: + + ``-fsanitize=undefined``: Fast and compatible undefined behavior + checker. Enables the undefined behavior checks that have small + runtime cost and no impact on address space layout or ABI. This + includes all of the checks listed below other than + ``unsigned-integer-overflow``. + + ``-fsanitize=undefined-trap``: This includes all sanitizers + included by ``-fsanitize=undefined``, except those that require + runtime support. This group of sanitizers are generally used + in conjunction with the ``-fsanitize-undefined-trap-on-error`` + flag, which causes traps to be emitted, rather than calls to + runtime libraries. This includes all of the checks listed below + other than ``unsigned-integer-overflow`` and ``vptr``. + + The following more fine-grained checks are also available: + + - ``-fsanitize=alignment``: Use of a misaligned pointer or creation + of a misaligned reference. + - ``-fsanitize=bool``: Load of a ``bool`` value which is neither + ``true`` nor ``false``. + - ``-fsanitize=bounds``: Out of bounds array indexing, in cases + where the array bound can be statically determined. + - ``-fsanitize=enum``: Load of a value of an enumerated type which + is not in the range of representable values for that enumerated + type. + - ``-fsanitize=float-cast-overflow``: Conversion to, from, or + between floating-point types which would overflow the + destination. + - ``-fsanitize=float-divide-by-zero``: Floating point division by + zero. + - ``-fsanitize=integer-divide-by-zero``: Integer division by zero. + - ``-fsanitize=null``: Use of a null pointer or creation of a null + reference. + - ``-fsanitize=object-size``: An attempt to use bytes which the + optimizer can determine are not part of the object being + accessed. The sizes of objects are determined using + ``__builtin_object_size``, and consequently may be able to detect + more problems at higher optimization levels. + - ``-fsanitize=return``: In C++, reaching the end of a + value-returning function without returning a value. + - ``-fsanitize=shift``: Shift operators where the amount shifted is + greater or equal to the promoted bit-width of the left hand side + or less than zero, or where the left hand side is negative. For a + signed left shift, also checks for signed overflow in C, and for + unsigned overflow in C++. + - ``-fsanitize=signed-integer-overflow``: Signed integer overflow, + including all the checks added by ``-ftrapv``, and checking for + overflow in signed division (``INT_MIN / -1``). + - ``-fsanitize=unreachable``: If control flow reaches + ``__builtin_unreachable``. + - ``-fsanitize=unsigned-integer-overflow``: Unsigned integer + overflows. + - ``-fsanitize=vla-bound``: A variable-length array whose bound + does not evaluate to a positive value. + - ``-fsanitize=vptr``: Use of an object whose vptr indicates that + it is of the wrong dynamic type, or that its lifetime has not + begun or has ended. Incompatible with ``-fno-rtti``. + + Experimental features of AddressSanitizer (not ready for widespread + use, require explicit ``-fsanitize=address``): + + - ``-fsanitize=use-after-return``: Check for use-after-return + errors (accessing local variable after the function exit). + - ``-fsanitize=use-after-scope``: Check for use-after-scope errors + (accesing local variable after it went out of scope). + + Extra features of MemorySanitizer (require explicit + ``-fsanitize=memory``): + + - ``-fsanitize-memory-track-origins``: Enables origin tracking in + MemorySanitizer. Adds a second section to MemorySanitizer + reports pointing to the heap or stack allocation the + uninitialized bits came from. Slows down execution by additional + 1.5x-2x. + + The ``-fsanitize=`` argument must also be provided when linking, in + order to link to the appropriate runtime library. It is not possible + to combine the ``-fsanitize=address`` and ``-fsanitize=thread`` + checkers in the same program. +**-f[no-]address-sanitizer** + Deprecated synonym for :ref:`-f[no-]sanitize=address + <opt_fsanitize_address>`. +**-f[no-]thread-sanitizer** + Deprecated synonym for :ref:`-f[no-]sanitize=thread + <opt_fsanitize_thread>`. + +.. option:: -fcatch-undefined-behavior + + Deprecated synonym for :ref:`-fsanitize=undefined + <opt_fsanitize_undefined>`. + +.. option:: -fno-assume-sane-operator-new + + Don't assume that the C++'s new operator is sane. + + This option tells the compiler to do not assume that C++'s global + new operator will always return a pointer that does not alias any + other pointer when the function returns. + +.. option:: -ftrap-function=[name] + + Instruct code generator to emit a function call to the specified + function name for ``__builtin_trap()``. + + LLVM code generator translates ``__builtin_trap()`` to a trap + instruction if it is supported by the target ISA. Otherwise, the + builtin is translated into a call to ``abort``. If this option is + set, then the code generator will always lower the builtin to a call + to the specified function regardless of whether the target ISA has a + trap instruction. This option is useful for environments (e.g. + deeply embedded) where a trap cannot be properly handled, or when + some custom behavior is desired. + +.. option:: -ftls-model=[model] + + Select which TLS model to use. + + Valid values are: ``global-dynamic``, ``local-dynamic``, + ``initial-exec`` and ``local-exec``. The default value is + ``global-dynamic``. The compiler may use a different model if the + selected model is not supported by the target, or if a more + efficient model can be used. The TLS model can be overridden per + variable using the ``tls_model`` attribute. + +Controlling Size of Debug Information +------------------------------------- + +Debug info kind generated by Clang can be set by one of the flags listed +below. If multiple flags are present, the last one is used. + +.. option:: -g0 + + Don't generate any debug info (default). + +.. option:: -gline-tables-only + + Generate line number tables only. + + This kind of debug info allows to obtain stack traces with function names, + file names and line numbers (by such tools as ``gdb`` or ``addr2line``). It + doesn't contain any other data (e.g. description of local variables or + function parameters). + +.. option:: -g + + Generate complete debug info. + +.. _c: + +C Language Features +=================== + +The support for standard C in clang is feature-complete except for the +C99 floating-point pragmas. + +Extensions supported by clang +----------------------------- + +See :doc:`LanguageExtensions`. + +Differences between various standard modes +------------------------------------------ + +clang supports the -std option, which changes what language mode clang +uses. The supported modes for C are c89, gnu89, c94, c99, gnu99 and +various aliases for those modes. If no -std option is specified, clang +defaults to gnu99 mode. + +Differences between all ``c*`` and ``gnu*`` modes: + +- ``c*`` modes define "``__STRICT_ANSI__``". +- Target-specific defines not prefixed by underscores, like "linux", + are defined in ``gnu*`` modes. +- Trigraphs default to being off in ``gnu*`` modes; they can be enabled by + the -trigraphs option. +- The parser recognizes "asm" and "typeof" as keywords in ``gnu*`` modes; + the variants "``__asm__``" and "``__typeof__``" are recognized in all + modes. +- The Apple "blocks" extension is recognized by default in ``gnu*`` modes + on some platforms; it can be enabled in any mode with the "-fblocks" + option. +- Arrays that are VLA's according to the standard, but which can be + constant folded by the frontend are treated as fixed size arrays. + This occurs for things like "int X[(1, 2)];", which is technically a + VLA. ``c*`` modes are strictly compliant and treat these as VLAs. + +Differences between ``*89`` and ``*99`` modes: + +- The ``*99`` modes default to implementing "inline" as specified in C99, + while the ``*89`` modes implement the GNU version. This can be + overridden for individual functions with the ``__gnu_inline__`` + attribute. +- Digraphs are not recognized in c89 mode. +- The scope of names defined inside a "for", "if", "switch", "while", + or "do" statement is different. (example: "``if ((struct x {int + x;}*)0) {}``".) +- ``__STDC_VERSION__`` is not defined in ``*89`` modes. +- "inline" is not recognized as a keyword in c89 mode. +- "restrict" is not recognized as a keyword in ``*89`` modes. +- Commas are allowed in integer constant expressions in ``*99`` modes. +- Arrays which are not lvalues are not implicitly promoted to pointers + in ``*89`` modes. +- Some warnings are different. + +c94 mode is identical to c89 mode except that digraphs are enabled in +c94 mode (FIXME: And ``__STDC_VERSION__`` should be defined!). + +GCC extensions not implemented yet +---------------------------------- + +clang tries to be compatible with gcc as much as possible, but some gcc +extensions are not implemented yet: + +- clang does not support #pragma weak (`bug + 3679 <http://llvm.org/bugs/show_bug.cgi?id=3679>`_). Due to the uses + described in the bug, this is likely to be implemented at some point, + at least partially. +- clang does not support decimal floating point types (``_Decimal32`` and + friends) or fixed-point types (``_Fract`` and friends); nobody has + expressed interest in these features yet, so it's hard to say when + they will be implemented. +- clang does not support nested functions; this is a complex feature + which is infrequently used, so it is unlikely to be implemented + anytime soon. In C++11 it can be emulated by assigning lambda + functions to local variables, e.g: + + .. code-block:: cpp + + auto const local_function = [&](int parameter) { + // Do something + }; + ... + local_function(1); + +- clang does not support global register variables; this is unlikely to + be implemented soon because it requires additional LLVM backend + support. +- clang does not support static initialization of flexible array + members. This appears to be a rarely used extension, but could be + implemented pending user demand. +- clang does not support + ``__builtin_va_arg_pack``/``__builtin_va_arg_pack_len``. This is + used rarely, but in some potentially interesting places, like the + glibc headers, so it may be implemented pending user demand. Note + that because clang pretends to be like GCC 4.2, and this extension + was introduced in 4.3, the glibc headers will not try to use this + extension with clang at the moment. +- clang does not support the gcc extension for forward-declaring + function parameters; this has not shown up in any real-world code + yet, though, so it might never be implemented. + +This is not a complete list; if you find an unsupported extension +missing from this list, please send an e-mail to cfe-dev. This list +currently excludes C++; see :ref:`C++ Language Features <cxx>`. Also, this +list does not include bugs in mostly-implemented features; please see +the `bug +tracker <http://llvm.org/bugs/buglist.cgi?quicksearch=product%3Aclang+component%3A-New%2BBugs%2CAST%2CBasic%2CDriver%2CHeaders%2CLLVM%2BCodeGen%2Cparser%2Cpreprocessor%2CSemantic%2BAnalyzer>`_ +for known existing bugs (FIXME: Is there a section for bug-reporting +guidelines somewhere?). + +Intentionally unsupported GCC extensions +---------------------------------------- + +- clang does not support the gcc extension that allows variable-length + arrays in structures. This is for a few reasons: one, it is tricky to + implement, two, the extension is completely undocumented, and three, + the extension appears to be rarely used. Note that clang *does* + support flexible array members (arrays with a zero or unspecified + size at the end of a structure). +- clang does not have an equivalent to gcc's "fold"; this means that + clang doesn't accept some constructs gcc might accept in contexts + where a constant expression is required, like "x-x" where x is a + variable. +- clang does not support ``__builtin_apply`` and friends; this extension + is extremely obscure and difficult to implement reliably. + +.. _c_ms: + +Microsoft extensions +-------------------- + +clang has some experimental support for extensions from Microsoft Visual +C++; to enable it, use the -fms-extensions command-line option. This is +the default for Windows targets. Note that the support is incomplete; +enabling Microsoft extensions will silently drop certain constructs +(including ``__declspec`` and Microsoft-style asm statements). + +clang has a -fms-compatibility flag that makes clang accept enough +invalid C++ to be able to parse most Microsoft headers. This flag is +enabled by default for Windows targets. + +-fdelayed-template-parsing lets clang delay all template instantiation +until the end of a translation unit. This flag is enabled by default for +Windows targets. + +- clang allows setting ``_MSC_VER`` with ``-fmsc-version=``. It defaults to + 1300 which is the same as Visual C/C++ 2003. Any number is supported + and can greatly affect what Windows SDK and c++stdlib headers clang + can compile. This option will be removed when clang supports the full + set of MS extensions required for these headers. +- clang does not support the Microsoft extension where anonymous record + members can be declared using user defined typedefs. +- clang supports the Microsoft "#pragma pack" feature for controlling + record layout. GCC also contains support for this feature, however + where MSVC and GCC are incompatible clang follows the MSVC + definition. +- clang defaults to C++11 for Windows targets. + +.. _cxx: + +C++ Language Features +===================== + +clang fully implements all of standard C++98 except for exported +templates (which were removed in C++11), and `many C++11 +features <http://clang.llvm.org/cxx_status.html>`_ are also implemented. + +Controlling implementation limits +--------------------------------- + +.. option:: -fbracket-depth=N + + Sets the limit for nested parentheses, brackets, and braces to N. The + default is 256. + +.. option:: -fconstexpr-depth=N + + Sets the limit for recursive constexpr function invocations to N. The + default is 512. + +.. option:: -ftemplate-depth=N + + Sets the limit for recursively nested template instantiations to N. The + default is 1024. + +.. _objc: + +Objective-C Language Features +============================= + +.. _objcxx: + +Objective-C++ Language Features +=============================== + + +.. _target_features: + +Target-Specific Features and Limitations +======================================== + +CPU Architectures Features and Limitations +------------------------------------------ + +X86 +^^^ + +The support for X86 (both 32-bit and 64-bit) is considered stable on +Darwin (Mac OS/X), Linux, FreeBSD, and Dragonfly BSD: it has been tested +to correctly compile many large C, C++, Objective-C, and Objective-C++ +codebases. + +On ``x86_64-mingw32``, passing i128(by value) is incompatible to Microsoft +x64 calling conversion. You might need to tweak +``WinX86_64ABIInfo::classify()`` in lib/CodeGen/TargetInfo.cpp. + +ARM +^^^ + +The support for ARM (specifically ARMv6 and ARMv7) is considered stable +on Darwin (iOS): it has been tested to correctly compile many large C, +C++, Objective-C, and Objective-C++ codebases. Clang only supports a +limited number of ARM architectures. It does not yet fully support +ARMv5, for example. + +Other platforms +^^^^^^^^^^^^^^^ + +clang currently contains some support for PPC and Sparc; however, +significant pieces of code generation are still missing, and they +haven't undergone significant testing. + +clang contains limited support for the MSP430 embedded processor, but +both the clang support and the LLVM backend support are highly +experimental. + +Other platforms are completely unsupported at the moment. Adding the +minimal support needed for parsing and semantic analysis on a new +platform is quite easy; see ``lib/Basic/Targets.cpp`` in the clang source +tree. This level of support is also sufficient for conversion to LLVM IR +for simple programs. Proper support for conversion to LLVM IR requires +adding code to ``lib/CodeGen/CGCall.cpp`` at the moment; this is likely to +change soon, though. Generating assembly requires a suitable LLVM +backend. + +Operating System Features and Limitations +----------------------------------------- + +Darwin (Mac OS/X) +^^^^^^^^^^^^^^^^^ + +None + +Windows +^^^^^^^ + +Experimental supports are on Cygming. + +See also `Microsoft Extensions <c_ms>`. + +Cygwin +"""""" + +Clang works on Cygwin-1.7. + +MinGW32 +""""""" + +Clang works on some mingw32 distributions. Clang assumes directories as +below; + +- ``C:/mingw/include`` +- ``C:/mingw/lib`` +- ``C:/mingw/lib/gcc/mingw32/4.[3-5].0/include/c++`` + +On MSYS, a few tests might fail. + +MinGW-w64 +""""""""" + +For 32-bit (i686-w64-mingw32), and 64-bit (x86\_64-w64-mingw32), Clang +assumes as below; + +- ``GCC versions 4.5.0 to 4.5.3, 4.6.0 to 4.6.2, or 4.7.0 (for the C++ header search path)`` +- ``some_directory/bin/gcc.exe`` +- ``some_directory/bin/clang.exe`` +- ``some_directory/bin/clang++.exe`` +- ``some_directory/bin/../include/c++/GCC_version`` +- ``some_directory/bin/../include/c++/GCC_version/x86_64-w64-mingw32`` +- ``some_directory/bin/../include/c++/GCC_version/i686-w64-mingw32`` +- ``some_directory/bin/../include/c++/GCC_version/backward`` +- ``some_directory/bin/../x86_64-w64-mingw32/include`` +- ``some_directory/bin/../i686-w64-mingw32/include`` +- ``some_directory/bin/../include`` + +This directory layout is standard for any toolchain you will find on the +official `MinGW-w64 website <http://mingw-w64.sourceforge.net>`_. + +Clang expects the GCC executable "gcc.exe" compiled for +``i686-w64-mingw32`` (or ``x86_64-w64-mingw32``) to be present on PATH. + +`Some tests might fail <http://llvm.org/bugs/show_bug.cgi?id=9072>`_ on +``x86_64-w64-mingw32``. diff --git a/docs/analyzer/DebugChecks.rst b/docs/analyzer/DebugChecks.rst new file mode 100644 index 0000000..f8e6f82 --- /dev/null +++ b/docs/analyzer/DebugChecks.rst @@ -0,0 +1,134 @@ +============ +Debug Checks +============ + +.. contents:: + :local: + +The analyzer contains a number of checkers which can aid in debugging. Enable +them by using the "-analyzer-checker=" flag, followed by the name of the +checker. + + +General Analysis Dumpers +======================== + +These checkers are used to dump the results of various infrastructural analyses +to stderr. Some checkers also have "view" variants, which will display a graph +using a 'dot' format viewer (such as Graphviz on OS X) instead. + +- debug.DumpCallGraph, debug.ViewCallGraph: Show the call graph generated for + the current translation unit. This is used to determine the order in which to + analyze functions when inlining is enabled. + +- debug.DumpCFG, debug.ViewCFG: Show the CFG generated for each top-level + function being analyzed. + +- debug.DumpDominators: Shows the dominance tree for the CFG of each top-level + function. + +- debug.DumpLiveVars: Show the results of live variable analysis for each + top-level function being analyzed. + + +Path Tracking +============= + +These checkers print information about the path taken by the analyzer engine. + +- debug.DumpCalls: Prints out every function or method call encountered during a + path traversal. This is indented to show the call stack, but does NOT do any + special handling of branches, meaning different paths could end up + interleaved. + +- debug.DumpTraversal: Prints the name of each branch statement encountered + during a path traversal ("IfStmt", "WhileStmt", etc). Currently used to check + whether the analysis engine is doing BFS or DFS. + + +State Checking +============== + +These checkers will print out information about the analyzer state in the form +of analysis warnings. They are intended for use with the -verify functionality +in regression tests. + +- debug.TaintTest: Prints out the word "tainted" for every expression that + carries taint. At the time of this writing, taint was only introduced by the + checks under experimental.security.taint.TaintPropagation; this checker may + eventually move to the security.taint package. + +- debug.ExprInspection: Responds to certain function calls, which are modeled + after builtins. These function calls should affect the program state other + than the evaluation of their arguments; to use them, you will need to declare + them within your test file. The available functions are described below. + +(FIXME: debug.ExprInspection should probably be renamed, since it no longer only +inspects expressions.) + + +ExprInspection checks +--------------------- + +- void clang_analyzer_eval(bool); + + Prints TRUE if the argument is known to have a non-zero value, FALSE if the + argument is known to have a zero or null value, and UNKNOWN if the argument + isn't sufficiently constrained on this path. You can use this to test other + values by using expressions like "x == 5". Note that this functionality is + currently DISABLED in inlined functions, since different calls to the same + inlined function could provide different information, making it difficult to + write proper -verify directives. + + In C, the argument can be typed as 'int' or as '_Bool'. + + Example usage:: + + clang_analyzer_eval(x); // expected-warning{{UNKNOWN}} + if (!x) return; + clang_analyzer_eval(x); // expected-warning{{TRUE}} + + +- void clang_analyzer_checkInlined(bool); + + If a call occurs within an inlined function, prints TRUE or FALSE according to + the value of its argument. If a call occurs outside an inlined function, + nothing is printed. + + The intended use of this checker is to assert that a function is inlined at + least once (by passing 'true' and expecting a warning), or to assert that a + function is never inlined (by passing 'false' and expecting no warning). The + argument is technically unnecessary but is intended to clarify intent. + + You might wonder why we can't print TRUE if a function is ever inlined and + FALSE if it is not. The problem is that any inlined function could conceivably + also be analyzed as a top-level function (in which case both TRUE and FALSE + would be printed), depending on the value of the -analyzer-inlining option. + + In C, the argument can be typed as 'int' or as '_Bool'. + + Example usage:: + + int inlined() { + clang_analyzer_checkInlined(true); // expected-warning{{TRUE}} + return 42; + } + + void topLevel() { + clang_analyzer_checkInlined(false); // no-warning (not inlined) + int value = inlined(); + // This assertion will not be valid if the previous call was not inlined. + clang_analyzer_eval(value == 42); // expected-warning{{TRUE}} + } + + +Statistics +========== + +The debug.Stats checker collects various information about the analysis of each +function, such as how many blocks were reached and if the analyzer timed out. + +There is also an additional -analyzer-stats flag, which enables various +statistics within the analyzer engine. Note the Stats checker (which produces at +least one bug report per function) may actually change the values reported by +-analyzer-stats. diff --git a/docs/analyzer/IPA.txt b/docs/analyzer/IPA.txt index 016cea9..01e73ce 100644 --- a/docs/analyzer/IPA.txt +++ b/docs/analyzer/IPA.txt @@ -2,36 +2,37 @@ Inlining ======== There are several options that control which calls the analyzer will consider for -inlining. The major one is -analyzer-ipa: +inlining. The major one is -analyzer-config ipa: - -analyzer-ipa=none - All inlining is disabled. This is the only mode available - in LLVM 3.1 and earlier and in Xcode 4.3 and earlier. + -analyzer-config ipa=none - All inlining is disabled. This is the only mode + available in LLVM 3.1 and earlier and in Xcode 4.3 and earlier. - -analyzer-ipa=basic-inlining - Turns on inlining for C functions, C++ static - member functions, and blocks -- essentially, the calls that behave like - simple C function calls. This is essentially the mode used in Xcode 4.4. + -analyzer-config ipa=basic-inlining - Turns on inlining for C functions, C++ + static member functions, and blocks -- essentially, the calls that behave + like simple C function calls. This is essentially the mode used in + Xcode 4.4. - -analyzer-ipa=inlining - Turns on inlining when we can confidently find the - function/method body corresponding to the call. (C functions, static + -analyzer-config ipa=inlining - Turns on inlining when we can confidently find + the function/method body corresponding to the call. (C functions, static functions, devirtualized C++ methods, Objective-C class methods, Objective-C instance methods when ExprEngine is confident about the dynamic type of the instance). - -analyzer-ipa=dynamic - Inline instance methods for which the type is + -analyzer-config ipa=dynamic - Inline instance methods for which the type is determined at runtime and we are not 100% sure that our type info is correct. For virtual calls, inline the most plausible definition. - -analyzer-ipa=dynamic-bifurcate - Same as -analyzer-ipa=dynamic, but the path - is split. We inline on one branch and do not inline on the other. This mode - does not drop the coverage in cases when the parent class has code that is - only exercised when some of its methods are overridden. + -analyzer-config ipa=dynamic-bifurcate - Same as -analyzer-config ipa=dynamic, + but the path is split. We inline on one branch and do not inline on the + other. This mode does not drop the coverage in cases when the parent class + has code that is only exercised when some of its methods are overridden. -Currently, -analyzer-ipa=dynamic-bifurcate is the default mode. +Currently, -analyzer-config ipa=dynamic-bifurcate is the default mode. -While -analyzer-ipa determines in general how aggressively the analyzer will try to -inline functions, several additional options control which types of functions can -inlined, in an all-or-nothing way. These options use the analyzer's configuration -table, so they are all specified as follows: +While -analyzer-config ipa determines in general how aggressively the analyzer +will try to inline functions, several additional options control which types of +functions can inlined, in an all-or-nothing way. These options use the +analyzer's configuration table, so they are all specified as follows: -analyzer-config OPTION=VALUE @@ -45,10 +46,14 @@ Each of these modes implies that all the previous member function kinds will be inlined as well; it doesn't make sense to inline destructors without inlining constructors, for example. -The default c++-inlining mode is 'methods', meaning only regular member -functions and overloaded operators will be inlined. Note that no C++ member -functions will be inlined under -analyzer-ipa=none or --analyzer-ipa=basic-inlining. +The default c++-inlining mode is 'destructors', meaning that all member +functions with visible definitions will be considered for inlining. In some +cases the analyzer may still choose not to inline the function. + +Note that under 'constructors', constructors for types with non-trivial +destructors will not be inlined. Additionally, no C++ member functions will be +inlined under -analyzer-config ipa=none or -analyzer-config ipa=basic-inlining, +regardless of the setting of the c++-inlining mode. ### c++-template-inlining ### @@ -71,7 +76,8 @@ considered for inlining. -analyzer-config c++-template-inlining=[true | false] -Currently, C++ standard library functions are NOT considered for inlining by default. +Currently, C++ standard library functions are considered for inlining by +default. The standard library functions and the STL in particular are used ubiquitously enough that our tolerance for false positives is even lower here. A false @@ -79,6 +85,31 @@ positive due to poor modeling of the STL leads to a poor user experience, since most users would not be comfortable adding assertions to system headers in order to silence analyzer warnings. +### c++-container-inlining ### + +This option controls whether constructors and destructors of "container" types +should be considered for inlining. + + -analyzer-config c++-container-inlining=[true | false] + +Currently, these constructors and destructors are NOT considered for inlining +by default. + +The current implementation of this setting checks whether a type has a member +named 'iterator' or a member named 'begin'; these names are idiomatic in C++, +with the latter specified in the C++11 standard. The analyzer currently does a +fairly poor job of modeling certain data structure invariants of container-like +objects. For example, these three expressions should be equivalent: + + std::distance(c.begin(), c.end()) == 0 + c.begin() == c.end() + c.empty()) + +Many of these issues are avoided if containers always have unknown, symbolic +state, which is what happens when their constructors are treated as opaque. +In the future, we may decide specific containers are "safe" to model through +inlining, or choose to model them directly using checkers instead. + Basics of Implementation ----------------------- @@ -229,31 +260,31 @@ inlined. == Inlining Dynamic Calls == -The -analyzer-ipa option has five different modes: none, basic-inlining, -inlining, dynamic, and dynamic-bifurcate. Under -analyzer-ipa=dynamic, all -dynamic calls are inlined, whether we are certain or not that this will actually -be the definition used at runtime. Under -analyzer-ipa=inlining, only -"near-perfect" devirtualized calls are inlined*, and other dynamic calls are -evaluated conservatively (as if no definition were available). +The -analyzer-config ipa option has five different modes: none, basic-inlining, +inlining, dynamic, and dynamic-bifurcate. Under -analyzer-config ipa=dynamic, +all dynamic calls are inlined, whether we are certain or not that this will +actually be the definition used at runtime. Under -analyzer-config ipa=inlining, +only "near-perfect" devirtualized calls are inlined*, and other dynamic calls +are evaluated conservatively (as if no definition were available). * Currently, no Objective-C messages are not inlined under - -analyzer-ipa=inlining, even if we are reasonably confident of the type of the - receiver. We plan to enable this once we have tested our heuristics more - thoroughly. + -analyzer-config ipa=inlining, even if we are reasonably confident of the type + of the receiver. We plan to enable this once we have tested our heuristics + more thoroughly. -The last option, -analyzer-ipa=dynamic-bifurcate, behaves similarly to +The last option, -analyzer-config ipa=dynamic-bifurcate, behaves similarly to "dynamic", but performs a conservative invalidation in the general virtual case in *addition* to inlining. The details of this are discussed below. -As stated above, -analyzer-ipa=basic-inlining does not inline any C++ member -functions or Objective-C method calls, even if they are non-virtual or can be -safely devirtualized. +As stated above, -analyzer-config ipa=basic-inlining does not inline any C++ +member functions or Objective-C method calls, even if they are non-virtual or +can be safely devirtualized. Bifurcation ----------- -ExprEngine::BifurcateCall implements the -analyzer-ipa=dynamic-bifurcate +ExprEngine::BifurcateCall implements the -analyzer-config ipa=dynamic-bifurcate mode. When a call is made on an object with imprecise dynamic type information diff --git a/docs/analyzer/Makefile b/docs/analyzer/Makefile new file mode 100644 index 0000000..14f5e60 --- /dev/null +++ b/docs/analyzer/Makefile @@ -0,0 +1,155 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +default: html + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ClangStaticAnalyzer.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ClangStaticAnalyzer.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/ClangStaticAnalyzer" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/ClangStaticAnalyzer" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/docs/analyzer/RegionStore.txt b/docs/analyzer/RegionStore.txt new file mode 100644 index 0000000..5d37cf7 --- /dev/null +++ b/docs/analyzer/RegionStore.txt @@ -0,0 +1,171 @@ +The analyzer "Store" represents the contents of memory regions. It is an opaque +functional data structure stored in each ProgramState; the only class that can +modify the store is its associated StoreManager. + +Currently (Feb. 2013), the only StoreManager implementation being used is +RegionStoreManager. This store records bindings to memory regions using a "base +region + offset" key. (This allows `*p` and `p[0]` to map to the same location, +among other benefits.) + +Regions are grouped into "clusters", which roughly correspond to "regions with +the same base region". This allows certain operations to be more efficient, +such as invalidation. + +Regions that do not have a known offset use a special "symbolic" offset. These +keys store both the original region, and the "concrete offset region" -- the +last region whose offset is entirely concrete. (For example, in the expression +`foo.bar[1][i].baz`, the concrete offset region is the array `foo.bar[1]`, +since that has a known offset from the start of the top-level `foo` struct.) + + +Binding Invalidation +==================== + +Supporting both concrete and symbolic offsets makes things a bit tricky. Here's +an example: + + foo[0] = 0; + foo[1] = 1; + foo[i] = i; + +After the third assignment, nothing can be said about the value of `foo[0]`, +because `foo[i]` may have overwritten it! Thus, *binding to a region with a +symbolic offset invalidates the entire concrete offset region.* We know +`foo[i]` is somewhere within `foo`, so we don't have to invalidate anything +else, but we do have to be conservative about all other bindings within `foo`. + +Continuing the example: + + foo[i] = i; + foo[0] = 0; + +After this latest assignment, nothing can be said about the value of `foo[i]`, +because `foo[0]` may have overwritten it! *Binding to a region R with a +concrete offset invalidates any symbolic offset bindings whose concrete offset +region is a super-region **or** sub-region of R.* All we know about `foo[i]` is +that it is somewhere within `foo`, so changing *anything* within `foo` might +change `foo[i]`, and changing *all* of `foo` (or its base region) will +*definitely* change `foo[i]`. + +This logic could be improved by using the current constraints on `i`, at the +cost of speed. The latter case could also be improved by matching region kinds, +i.e. changing `foo[0].a` is unlikely to affect `foo[i].b`, no matter what `i` +is. + +For more detail, read through RegionStoreManager::removeSubRegionBindings in +RegionStore.cpp. + + +ObjCIvarRegions +=============== + +Objective-C instance variables require a bit of special handling. Like struct +fields, they are not base regions, and when their parent object region is +invalidated, all the instance variables must be invalidated as well. However, +they have no concrete compile-time offsets (in the modern, "non-fragile" +runtime), and so cannot easily be represented as an offset from the start of +the object in the analyzer. Moreover, this means that invalidating a single +instance variable should *not* invalidate the rest of the object, since unlike +struct fields or array elements there is no way to perform pointer arithmetic +to access another instance variable. + +Consequently, although the base region of an ObjCIvarRegion is the entire +object, RegionStore offsets are computed from the start of the instance +variable. Thus it is not valid to assume that all bindings with non-symbolic +offsets start from the base region! + + +Region Invalidation +=================== + +Unlike binding invalidation, region invalidation occurs when the entire +contents of a region may have changed---say, because it has been passed to a +function the analyzer can model, like memcpy, or because its address has +escaped, usually as an argument to an opaque function call. In these cases we +need to throw away not just all bindings within the region itself, but within +its entire cluster, since neighboring regions may be accessed via pointer +arithmetic. + +Region invalidation typically does even more than this, however. Because it +usually represents the complete escape of a region from the analyzer's model, +its *contents* must also be transitively invalidated. (For example, if a region +'p' of type 'int **' is invalidated, the contents of '*p' and '**p' may have +changed as well.) The algorithm that traverses this transitive closure of +accessible regions is known as ClusterAnalysis, and is also used for finding +all live bindings in the store (in order to throw away the dead ones). The name +"ClusterAnalysis" predates the cluster-based organization of bindings, but +refers to the same concept: during invalidation and liveness analysis, all +bindings within a cluster must be treated in the same way for a conservative +model of program behavior. + + +Default Bindings +================ + +Most bindings in RegionStore are simple scalar values -- integers and pointers. +These are known as "Direct" bindings. However, RegionStore supports a second +type of binding called a "Default" binding. These are used to provide values to +all the elements of an aggregate type (struct or array) without having to +explicitly specify a binding for each individual element. + +When there is no Direct binding for a particular region, the store manager +looks at each super-region in turn to see if there is a Default binding. If so, +this value is used as the value of the original region. The search ends when +the base region is reached, at which point the RegionStore will pick an +appropriate default value for the region (usually a symbolic value, but +sometimes zero, for static data, or "uninitialized", for stack variables). + + int manyInts[10]; + manyInts[1] = 42; // Creates a Direct binding for manyInts[1]. + print(manyInts[1]); // Retrieves the Direct binding for manyInts[1]; + print(manyInts[0]); // There is no Direct binding for manyInts[1]. + // Is there a Default binding for the entire array? + // There is not, but it is a stack variable, so we use + // "uninitialized" as the default value (and emit a + // diagnostic!). + +NOTE: The fact that bindings are stored as a base region plus an offset limits +the Default Binding strategy, because in C aggregates can contain other +aggregates. In the current implementation of RegionStore, there is no way to +distinguish a Default binding for an entire aggregate from a Default binding +for the sub-aggregate at offset 0. + + +Lazy Bindings (LazyCompoundVal) +=============================== + +RegionStore implements an optimization for copying aggregates (structs and +arrays) called "lazy bindings", implemented using a special SVal called +LazyCompoundVal. When the store is asked for the "binding" for an entire +aggregate (i.e. for an lvalue-to-rvalue conversion), it returns a +LazyCompoundVal instead. When this value is then stored into a variable, it is +bound as a Default value. This makes copying arrays and structs much cheaper +than if they had required memberwise access. + +Under the hood, a LazyCompoundVal is implemented as a uniqued pair of (region, +store), representing "the value of the region during this 'snapshot' of the +store". This has important implications for any sort of liveness or +reachability analysis, which must take the bindings in the old store into +account. + +Retrieving a value from a lazy binding happens in the same way as any other +Default binding: since there is no direct binding, the store manager falls back +to super-regions to look for an appropriate default binding. LazyCompoundVal +differs from a normal default binding, however, in that it contains several +different values, instead of one value that will appear several times. Because +of this, the store manager has to reconstruct the subregion chain on top of the +LazyCompoundVal region, and look up *that* region in the previous store. + +Here's a concrete example: + + CGPoint p; + p.x = 42; // A Direct binding is made to the FieldRegion 'p.x'. + CGPoint p2 = p; // A LazyCompoundVal is created for 'p', along with a + // snapshot of the current store state. This value is then + // used as a Default binding for the VarRegion 'p2'. + return p2.x; // The binding for FieldRegion 'p2.x' is requested. + // There is no Direct binding, so we look for a Default + // binding to 'p2' and find the LCV. + // Because it's an LCV, we look at our requested region + // and see that it's the '.x' field. We ask for the value + // of 'p.x' within the snapshot, and get back 42. diff --git a/docs/analyzer/conf.py b/docs/analyzer/conf.py new file mode 100644 index 0000000..dff9610 --- /dev/null +++ b/docs/analyzer/conf.py @@ -0,0 +1,246 @@ +# -*- coding: utf-8 -*- +# +# Clang Static Analyzer documentation build configuration file, created by +# sphinx-quickstart on Wed Jan 2 15:54:28 2013. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.todo', 'sphinx.ext.mathjax'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Clang Static Analyzer' +copyright = u'2013, Analyzer Team' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '3.3' +# The full version, including alpha/beta/rc tags. +release = '3.3' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'haiku' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'ClangStaticAnalyzerdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'ClangStaticAnalyzer.tex', u'Clang Static Analyzer Documentation', + u'Analyzer Team', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'clangstaticanalyzer', u'Clang Static Analyzer Documentation', + [u'Analyzer Team'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'ClangStaticAnalyzer', u'Clang Static Analyzer Documentation', + u'Analyzer Team', 'ClangStaticAnalyzer', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'http://docs.python.org/': None} diff --git a/docs/analyzer/debug-checks.txt b/docs/analyzer/debug-checks.txt deleted file mode 100644 index 6ac451f..0000000 --- a/docs/analyzer/debug-checks.txt +++ /dev/null @@ -1,89 +0,0 @@ -The analyzer contains a number of checkers which can aid in debugging. Enable them by using the "-analyzer-checker=" flag, followed by the name of the checker. - -General Analysis Dumpers -======================== -These checkers are used to dump the results of various infrastructural analyses to stderr. Some checkers also have "view" variants, which will display a graph using a 'dot' format viewer (such as Graphviz on OS X) instead. - -- debug.DumpCallGraph, debug.ViewCallGraph: Show the call graph generated for the current translation unit. This is used to determine the order in which to analyze functions when inlining is enabled. -- debug.DumpCFG, debug.ViewCFG: Show the CFG generated for each top-level function being analyzed. -- debug.DumpDominators: Shows the dominance tree for the CFG of each top-level function. -- debug.DumpLiveVars: Show the results of live variable analysis for each top-level function being analyzed. - - -Path Tracking -============= -These checkers print information about the path taken by the analyzer engine. - -- debug.DumpCalls: Prints out every function or method call encountered during a path traversal. This is indented to show the call stack, but does NOT do any special handling of branches, meaning different paths could end up interleaved. -- debug.DumpTraversal: Prints the name of each branch statement encountered during a path traversal ("IfStmt", "WhileStmt", etc). Currently used to check whether the analysis engine is doing BFS or DFS. - - -State Checking -============== -These checkers will print out information about the analyzer state in the form of analysis warnings. They are intended for use with the -verify functionality in regression tests. - -- debug.TaintTest: Prints out the word "tainted" for every expression that carries taint. At the time of this writing, taint was only introduced by the checks under experimental.security.taint.TaintPropagation; this checker may eventually move to the security.taint package. -- debug.ExprInspection: Responds to certain function calls, which are modeled after builtins. These function calls should affect the program state other than the evaluation of their arguments; to use them, you will need to declare them within your test file. The available functions are described below. - -(FIXME: debug.ExprInspection should probably be renamed, since it no longer only inspects expressions.) - - -ExprInspection checks ---------------------- - -- void clang_analyzer_eval(bool); - -Prints TRUE if the argument is known to have a non-zero value, - FALSE if the argument is known to have a zero or null value, and - UNKNOWN if the argument isn't sufficiently constrained on this path. -You can use this to test other values by using expressions like "x == 5". -Note that this functionality is currently DISABLED in inlined functions, -since different calls to the same inlined function could provide different -information, making it difficult to write proper -verify directives. - -In C, the argument can be typed as 'int' or as '_Bool'. - -Example usage: - clang_analyzer_eval(x); // expected-warning{{UNKNOWN}} - if (!x) return; - clang_analyzer_eval(x); // expected-warning{{TRUE}} - - -- void clang_analyzer_checkInlined(bool); - -If a call occurs within an inlined function, prints TRUE or FALSE according to -the value of its argument. If a call occurs outside an inlined function, -nothing is printed. - -The intended use of this checker is to assert that a function is inlined at -least once (by passing 'true' and expecting a warning), or to assert that a -function is never inlined (by passing 'false' and expecting no warning). The -argument is technically unnecessary but is intended to clarify intent. - -You might wonder why we can't print TRUE if a function is ever inlined and -FALSE if it is not. The problem is that any inlined function could conceivably -also be analyzed as a top-level function (in which case both TRUE and FALSE -would be printed), depending on the value of the -analyzer-inlining option. - -In C, the argument can be typed as 'int' or as '_Bool'. - -Example usage: - int inlined() { - clang_analyzer_checkInlined(true); // expected-warning{{TRUE}} - return 42; - } - - void topLevel() { - clang_analyzer_checkInlined(false); // no-warning (not inlined) - int value = inlined(); - // This assertion will not be valid if the previous call was not inlined. - clang_analyzer_eval(value == 42); // expected-warning{{TRUE}} - } - - - -Statistics -========== -The debug.Stats checker collects various information about the analysis of each function, such as how many blocks were reached and if the analyzer timed out. - -There is also an additional -analyzer-stats flag, which enables various statistics within the analyzer engine. Note the Stats checker (which produces at least one bug report per function) may actually change the values reported by -analyzer-stats. diff --git a/docs/analyzer/index.rst b/docs/analyzer/index.rst new file mode 100644 index 0000000..767567f --- /dev/null +++ b/docs/analyzer/index.rst @@ -0,0 +1,23 @@ +.. Clang Static Analyzer documentation master file, created by + sphinx-quickstart on Wed Jan 2 15:54:28 2013. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Clang Static Analyzer's documentation! +================================================= + +Contents: + +.. toctree:: + :maxdepth: 2 + + DebugChecks + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/analyzer/make.bat b/docs/analyzer/make.bat new file mode 100644 index 0000000..6c2c63d --- /dev/null +++ b/docs/analyzer/make.bat @@ -0,0 +1,190 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^<target^>` where ^<target^> is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\ClangStaticAnalyzer.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\ClangStaticAnalyzer.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..92741d2 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- +# +# Clang documentation build configuration file, created by +# sphinx-quickstart on Sun Dec 9 20:01:55 2012. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.todo', 'sphinx.ext.mathjax'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Clang' +copyright = u'2007-2013, The Clang Team' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '3.3' +# The full version, including alpha/beta/rc tags. +release = '3.3' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build', 'analyzer'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'friendly' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'haiku' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Clangdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'Clang.tex', u'Clang Documentation', + u'The Clang Team', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'clang', u'Clang Documentation', + [u'The Clang Team'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'Clang', u'Clang Documentation', + u'The Clang Team', 'Clang', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..5cdfb6b --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,73 @@ +.. Clang documentation master file, created by + sphinx-quickstart on Sun Dec 9 20:01:55 2012. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +.. title:: Welcome to Clang's documentation! + +.. toctree:: + :maxdepth: 1 + + ReleaseNotes + +Using Clang as a Compiler +========================= + +.. toctree:: + :maxdepth: 1 + + UsersManual + LanguageExtensions + AddressSanitizer + ThreadSanitizer + MemorySanitizer + Modules + FAQ + +Using Clang as a Library +======================== + +.. toctree:: + :maxdepth: 1 + + Tooling + ExternalClangExamples + IntroductionToTheClangAST + LibTooling + LibFormat + ClangPlugins + RAVFrontendAction + LibASTMatchersTutorial + LibASTMatchers + HowToSetupToolingForLLVM + JSONCompilationDatabase + +Using Clang Tools +================= + +.. toctree:: + :maxdepth: 1 + + ClangTools + ClangCheck + ClangFormat + +Design Documents +================ + +.. toctree:: + :maxdepth: 1 + + InternalsManual + DriverInternals + PTHInternals + PCHInternals + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..f284258 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,190 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^<target^>` where ^<target^> is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Clang.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Clang.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end diff --git a/docs/tools/dump_ast_matchers.py b/docs/tools/dump_ast_matchers.py index bc5f1a6..4ed6822 100644 --- a/docs/tools/dump_ast_matchers.py +++ b/docs/tools/dump_ast_matchers.py @@ -133,24 +133,56 @@ def act_on_decl(declaration, comment, allowed_types): if declaration.strip(): # Node matchers are defined by writing: # VariadicDynCastAllOfMatcher<ResultType, ArgumentType> name; - m = re.match(r""".*VariadicDynCastAllOfMatcher\s*< - \s*([^\s,]+)\s*, - \s*([^\s>]+)\s*> + m = re.match(r""".*Variadic(?:DynCast)?AllOfMatcher\s*< + \s*([^\s,]+)\s*(?:, + \s*([^\s>]+)\s*)?> \s*([^\s;]+)\s*;\s*$""", declaration, flags=re.X) if m: result, inner, name = m.groups() + if not inner: + inner = result add_matcher(result, name, 'Matcher<%s>...' % inner, comment, is_dyncast=True) return # Parse the various matcher definition macros. - m = re.match(r"""^\s*AST_(POLYMORPHIC_)?MATCHER(_P)?(.?)\( + m = re.match(""".*AST_TYPE_MATCHER\( + \s*([^\s,]+\s*), + \s*([^\s,]+\s*) + \)\s*;\s*$""", declaration, flags=re.X) + if m: + inner, name = m.groups() + add_matcher('Type', name, 'Matcher<%s>...' % inner, + comment, is_dyncast=True) + add_matcher('TypeLoc', '%sLoc' % name, 'Matcher<%sLoc>...' % inner, + comment, is_dyncast=True) + return + + m = re.match(""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER\( + \s*([^\s,]+\s*), + \s*(?:[^\s,]+\s*) + \)\s*;\s*$""", declaration, flags=re.X) + if m: + loc = m.group(1) + name = m.group(2) + result_types = extract_result_types(comment) + if not result_types: + raise Exception('Did not find allowed result types for: %s' % name) + for result_type in result_types: + add_matcher(result_type, name, 'Matcher<Type>', comment) + if loc: + add_matcher('%sLoc' % result_type, '%sLoc' % name, 'Matcher<TypeLoc>', + comment) + return + + m = re.match(r"""^\s*AST_(POLYMORPHIC_)?MATCHER(_P)?(.?)(?:_OVERLOAD)?\( (?:\s*([^\s,]+)\s*,)? \s*([^\s,]+)\s* (?:,\s*([^\s,]+)\s* ,\s*([^\s,]+)\s*)? (?:,\s*([^\s,]+)\s* ,\s*([^\s,]+)\s*)? + (?:,\s*\d+\s*)? \)\s*{\s*$""", declaration, flags=re.X) if m: p, n, result, name = m.groups()[1:5] @@ -178,9 +210,9 @@ def act_on_decl(declaration, comment, allowed_types): if m: result, name, args = m.groups() args = ', '.join(p.strip() for p in args.split(',')) - m = re.match(r'.*\s+internal::Matcher<([^>]+)>$', result) + m = re.match(r'.*\s+internal::(Bindable)?Matcher<([^>]+)>$', result) if m: - result_types = [m.group(1)] + result_types = [m.group(2)] else: result_types = extract_result_types(comment) if not result_types: |