summaryrefslogtreecommitdiff
path: root/share/doc/cppinternals
diff options
context:
space:
mode:
Diffstat (limited to 'share/doc/cppinternals')
-rw-r--r--share/doc/cppinternals/Concept-Index.html174
-rw-r--r--share/doc/cppinternals/Conventions.html84
-rw-r--r--share/doc/cppinternals/Files.html135
-rw-r--r--share/doc/cppinternals/Guard-Macros.html188
-rw-r--r--share/doc/cppinternals/Hash-Nodes.html130
-rw-r--r--share/doc/cppinternals/Invalid-identifiers.html50
-rw-r--r--share/doc/cppinternals/Lexer.html346
-rw-r--r--share/doc/cppinternals/Lexing-a-line.html50
-rw-r--r--share/doc/cppinternals/Line-Numbering.html158
-rw-r--r--share/doc/cppinternals/Macro-Expansion.html241
-rw-r--r--share/doc/cppinternals/Token-Spacing.html203
-rw-r--r--share/doc/cppinternals/index.html160
12 files changed, 1919 insertions, 0 deletions
diff --git a/share/doc/cppinternals/Concept-Index.html b/share/doc/cppinternals/Concept-Index.html
new file mode 100644
index 0000000..7540e88
--- /dev/null
+++ b/share/doc/cppinternals/Concept-Index.html
@@ -0,0 +1,174 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
+<head>
+<title>The GNU C Preprocessor Internals: Concept Index</title>
+
+<meta name="description" content="The GNU C Preprocessor Internals: Concept Index">
+<meta name="keywords" content="The GNU C Preprocessor Internals: Concept Index">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<link href="index.html#Top" rel="start" title="Top">
+<link href="#Concept-Index" rel="index" title="Concept Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="index.html#Top" rel="up" title="Top">
+<link href="Files.html#Files" rel="previous" title="Files">
+<style type="text/css">
+<!--
+a.summary-letter {text-decoration: none}
+blockquote.smallquotation {font-size: smaller}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+div.indentedblock {margin-left: 3.2em}
+div.lisp {margin-left: 3.2em}
+div.smalldisplay {margin-left: 3.2em}
+div.smallexample {margin-left: 3.2em}
+div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
+div.smalllisp {margin-left: 3.2em}
+kbd {font-style:oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+pre.smalldisplay {font-family: inherit; font-size: smaller}
+pre.smallexample {font-size: smaller}
+pre.smallformat {font-family: inherit; font-size: smaller}
+pre.smalllisp {font-size: smaller}
+span.nocodebreak {white-space:nowrap}
+span.nolinebreak {white-space:nowrap}
+span.roman {font-family:serif; font-weight:normal}
+span.sansserif {font-family:sans-serif; font-weight:normal}
+ul.no-bullet {list-style: none}
+-->
+</style>
+
+
+</head>
+
+<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+<a name="Concept-Index"></a>
+<div class="header">
+<p>
+Previous: <a href="Files.html#Files" accesskey="p" rel="previous">Files</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<a name="Concept-Index-1"></a>
+<h2 class="unnumbered">Concept Index</h2>
+<table><tr><th valign="top">Jump to: &nbsp; </th><td><a class="summary-letter" href="#Concept-Index_cp_letter-A"><b>A</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-C"><b>C</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-E"><b>E</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-F"><b>F</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-G"><b>G</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-H"><b>H</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-I"><b>I</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-L"><b>L</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-M"><b>M</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-N"><b>N</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-P"><b>P</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-S"><b>S</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-T"><b>T</b></a>
+ &nbsp;
+</td></tr></table>
+<table class="index-cp" border="0">
+<tr><td></td><th align="left">Index Entry</th><td>&nbsp;</td><th align="left"> Section</th></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-A">A</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Hash-Nodes.html#index-assertions">assertions</a>:</td><td>&nbsp;</td><td valign="top"><a href="Hash-Nodes.html#Hash-Nodes">Hash Nodes</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-C">C</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Guard-Macros.html#index-controlling-macros">controlling macros</a>:</td><td>&nbsp;</td><td valign="top"><a href="Guard-Macros.html#Guard-Macros">Guard Macros</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-E">E</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Lexer.html#index-escaped-newlines">escaped newlines</a>:</td><td>&nbsp;</td><td valign="top"><a href="Lexer.html#Lexer">Lexer</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-F">F</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Files.html#index-files">files</a>:</td><td>&nbsp;</td><td valign="top"><a href="Files.html#Files">Files</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-G">G</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Guard-Macros.html#index-guard-macros">guard macros</a>:</td><td>&nbsp;</td><td valign="top"><a href="Guard-Macros.html#Guard-Macros">Guard Macros</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-H">H</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Hash-Nodes.html#index-hash-table">hash table</a>:</td><td>&nbsp;</td><td valign="top"><a href="Hash-Nodes.html#Hash-Nodes">Hash Nodes</a></td></tr>
+<tr><td></td><td valign="top"><a href="Conventions.html#index-header-files">header files</a>:</td><td>&nbsp;</td><td valign="top"><a href="Conventions.html#Conventions">Conventions</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-I">I</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Hash-Nodes.html#index-identifiers">identifiers</a>:</td><td>&nbsp;</td><td valign="top"><a href="Hash-Nodes.html#Hash-Nodes">Hash Nodes</a></td></tr>
+<tr><td></td><td valign="top"><a href="Conventions.html#index-interface">interface</a>:</td><td>&nbsp;</td><td valign="top"><a href="Conventions.html#Conventions">Conventions</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-L">L</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Lexer.html#index-lexer">lexer</a>:</td><td>&nbsp;</td><td valign="top"><a href="Lexer.html#Lexer">Lexer</a></td></tr>
+<tr><td></td><td valign="top"><a href="Line-Numbering.html#index-line-numbers">line numbers</a>:</td><td>&nbsp;</td><td valign="top"><a href="Line-Numbering.html#Line-Numbering">Line Numbering</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-M">M</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Macro-Expansion.html#index-macro-expansion">macro expansion</a>:</td><td>&nbsp;</td><td valign="top"><a href="Macro-Expansion.html#Macro-Expansion">Macro Expansion</a></td></tr>
+<tr><td></td><td valign="top"><a href="Macro-Expansion.html#index-macro-representation-_0028internal_0029">macro representation (internal)</a>:</td><td>&nbsp;</td><td valign="top"><a href="Macro-Expansion.html#Macro-Expansion">Macro Expansion</a></td></tr>
+<tr><td></td><td valign="top"><a href="Hash-Nodes.html#index-macros">macros</a>:</td><td>&nbsp;</td><td valign="top"><a href="Hash-Nodes.html#Hash-Nodes">Hash Nodes</a></td></tr>
+<tr><td></td><td valign="top"><a href="Guard-Macros.html#index-multiple_002dinclude-optimization">multiple-include optimization</a>:</td><td>&nbsp;</td><td valign="top"><a href="Guard-Macros.html#Guard-Macros">Guard Macros</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-N">N</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Hash-Nodes.html#index-named-operators">named operators</a>:</td><td>&nbsp;</td><td valign="top"><a href="Hash-Nodes.html#Hash-Nodes">Hash Nodes</a></td></tr>
+<tr><td></td><td valign="top"><a href="Lexer.html#index-newlines">newlines</a>:</td><td>&nbsp;</td><td valign="top"><a href="Lexer.html#Lexer">Lexer</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-P">P</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Token-Spacing.html#index-paste-avoidance">paste avoidance</a>:</td><td>&nbsp;</td><td valign="top"><a href="Token-Spacing.html#Token-Spacing">Token Spacing</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-S">S</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Token-Spacing.html#index-spacing">spacing</a>:</td><td>&nbsp;</td><td valign="top"><a href="Token-Spacing.html#Token-Spacing">Token Spacing</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+<tr><th><a name="Concept-Index_cp_letter-T">T</a></th><td></td><td></td></tr>
+<tr><td></td><td valign="top"><a href="Lexer.html#index-token-run">token run</a>:</td><td>&nbsp;</td><td valign="top"><a href="Lexer.html#Lexer">Lexer</a></td></tr>
+<tr><td></td><td valign="top"><a href="Token-Spacing.html#index-token-spacing">token spacing</a>:</td><td>&nbsp;</td><td valign="top"><a href="Token-Spacing.html#Token-Spacing">Token Spacing</a></td></tr>
+<tr><td colspan="4"> <hr></td></tr>
+</table>
+<table><tr><th valign="top">Jump to: &nbsp; </th><td><a class="summary-letter" href="#Concept-Index_cp_letter-A"><b>A</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-C"><b>C</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-E"><b>E</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-F"><b>F</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-G"><b>G</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-H"><b>H</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-I"><b>I</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-L"><b>L</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-M"><b>M</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-N"><b>N</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-P"><b>P</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-S"><b>S</b></a>
+ &nbsp;
+<a class="summary-letter" href="#Concept-Index_cp_letter-T"><b>T</b></a>
+ &nbsp;
+</td></tr></table>
+
+<hr>
+<div class="header">
+<p>
+Previous: <a href="Files.html#Files" accesskey="p" rel="previous">Files</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/share/doc/cppinternals/Conventions.html b/share/doc/cppinternals/Conventions.html
new file mode 100644
index 0000000..f45692a
--- /dev/null
+++ b/share/doc/cppinternals/Conventions.html
@@ -0,0 +1,84 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
+<head>
+<title>The GNU C Preprocessor Internals: Conventions</title>
+
+<meta name="description" content="The GNU C Preprocessor Internals: Conventions">
+<meta name="keywords" content="The GNU C Preprocessor Internals: Conventions">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<link href="index.html#Top" rel="start" title="Top">
+<link href="Concept-Index.html#Concept-Index" rel="index" title="Concept Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="index.html#Top" rel="up" title="Top">
+<link href="Lexer.html#Lexer" rel="next" title="Lexer">
+<link href="index.html#Top" rel="previous" title="Top">
+<style type="text/css">
+<!--
+a.summary-letter {text-decoration: none}
+blockquote.smallquotation {font-size: smaller}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+div.indentedblock {margin-left: 3.2em}
+div.lisp {margin-left: 3.2em}
+div.smalldisplay {margin-left: 3.2em}
+div.smallexample {margin-left: 3.2em}
+div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
+div.smalllisp {margin-left: 3.2em}
+kbd {font-style:oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+pre.smalldisplay {font-family: inherit; font-size: smaller}
+pre.smallexample {font-size: smaller}
+pre.smallformat {font-family: inherit; font-size: smaller}
+pre.smalllisp {font-size: smaller}
+span.nocodebreak {white-space:nowrap}
+span.nolinebreak {white-space:nowrap}
+span.roman {font-family:serif; font-weight:normal}
+span.sansserif {font-family:sans-serif; font-weight:normal}
+ul.no-bullet {list-style: none}
+-->
+</style>
+
+
+</head>
+
+<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+<a name="Conventions"></a>
+<div class="header">
+<p>
+Next: <a href="Lexer.html#Lexer" accesskey="n" rel="next">Lexer</a>, Previous: <a href="index.html#Top" accesskey="p" rel="previous">Top</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<a name="Conventions-1"></a>
+<h2 class="unnumbered">Conventions</h2>
+<a name="index-interface"></a>
+<a name="index-header-files"></a>
+
+<p>cpplib has two interfaces&mdash;one is exposed internally only, and the
+other is for both internal and external use.
+</p>
+<p>The convention is that functions and types that are exposed to multiple
+files internally are prefixed with &lsquo;<samp>_cpp_</samp>&rsquo;, and are to be found in
+the file <samp>internal.h</samp>. Functions and types exposed to external
+clients are in <samp>cpplib.h</samp>, and prefixed with &lsquo;<samp>cpp_</samp>&rsquo;. For
+historical reasons this is no longer quite true, but we should strive to
+stick to it.
+</p>
+<p>We are striving to reduce the information exposed in <samp>cpplib.h</samp> to the
+bare minimum necessary, and then to keep it there. This makes clear
+exactly what external clients are entitled to assume, and allows us to
+change internals in the future without worrying whether library clients
+are perhaps relying on some kind of undocumented implementation-specific
+behavior.
+</p>
+
+
+
+</body>
+</html>
diff --git a/share/doc/cppinternals/Files.html b/share/doc/cppinternals/Files.html
new file mode 100644
index 0000000..68bfa42
--- /dev/null
+++ b/share/doc/cppinternals/Files.html
@@ -0,0 +1,135 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
+<head>
+<title>The GNU C Preprocessor Internals: Files</title>
+
+<meta name="description" content="The GNU C Preprocessor Internals: Files">
+<meta name="keywords" content="The GNU C Preprocessor Internals: Files">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<link href="index.html#Top" rel="start" title="Top">
+<link href="Concept-Index.html#Concept-Index" rel="index" title="Concept Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="index.html#Top" rel="up" title="Top">
+<link href="Concept-Index.html#Concept-Index" rel="next" title="Concept Index">
+<link href="Guard-Macros.html#Guard-Macros" rel="previous" title="Guard Macros">
+<style type="text/css">
+<!--
+a.summary-letter {text-decoration: none}
+blockquote.smallquotation {font-size: smaller}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+div.indentedblock {margin-left: 3.2em}
+div.lisp {margin-left: 3.2em}
+div.smalldisplay {margin-left: 3.2em}
+div.smallexample {margin-left: 3.2em}
+div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
+div.smalllisp {margin-left: 3.2em}
+kbd {font-style:oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+pre.smalldisplay {font-family: inherit; font-size: smaller}
+pre.smallexample {font-size: smaller}
+pre.smallformat {font-family: inherit; font-size: smaller}
+pre.smalllisp {font-size: smaller}
+span.nocodebreak {white-space:nowrap}
+span.nolinebreak {white-space:nowrap}
+span.roman {font-family:serif; font-weight:normal}
+span.sansserif {font-family:sans-serif; font-weight:normal}
+ul.no-bullet {list-style: none}
+-->
+</style>
+
+
+</head>
+
+<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+<a name="Files"></a>
+<div class="header">
+<p>
+Next: <a href="Concept-Index.html#Concept-Index" accesskey="n" rel="next">Concept Index</a>, Previous: <a href="Guard-Macros.html#Guard-Macros" accesskey="p" rel="previous">Guard Macros</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<a name="File-Handling"></a>
+<h2 class="unnumbered">File Handling</h2>
+<a name="index-files"></a>
+
+<p>Fairly obviously, the file handling code of cpplib resides in the file
+<samp>files.cc</samp>. It takes care of the details of file searching,
+opening, reading and caching, for both the main source file and all the
+headers it recursively includes.
+</p>
+<p>The basic strategy is to minimize the number of system calls. On many
+systems, the basic <code>open ()</code> and <code>fstat ()</code> system calls can
+be quite expensive. For every <code>#include</code>-d file, we need to try
+all the directories in the search path until we find a match. Some
+projects, such as glibc, pass twenty or thirty include paths on the
+command line, so this can rapidly become time consuming.
+</p>
+<p>For a header file we have not encountered before we have little choice
+but to do this. However, it is often the case that the same headers are
+repeatedly included, and in these cases we try to avoid repeating the
+filesystem queries whilst searching for the correct file.
+</p>
+<p>For each file we try to open, we store the constructed path in a splay
+tree. This path first undergoes simplification by the function
+<code>_cpp_simplify_pathname</code>. For example,
+<samp>/usr/include/bits/../foo.h</samp> is simplified to
+<samp>/usr/include/foo.h</samp> before we enter it in the splay tree and try
+to <code>open ()</code> the file. CPP will then find subsequent uses of
+<samp>foo.h</samp>, even as <samp>/usr/include/foo.h</samp>, in the splay tree and
+save system calls.
+</p>
+<p>Further, it is likely the file contents have also been cached, saving a
+<code>read ()</code> system call. We don&rsquo;t bother caching the contents of
+header files that are re-inclusion protected, and whose re-inclusion
+macro is defined when we leave the header file for the first time. If
+the host supports it, we try to map suitably large files into memory,
+rather than reading them in directly.
+</p>
+<p>The include paths are internally stored on a null-terminated
+singly-linked list, starting with the <code>&quot;header.h&quot;</code> directory search
+chain, which then links into the <code>&lt;header.h&gt;</code> directory chain.
+</p>
+<p>Files included with the <code>&lt;foo.h&gt;</code> syntax start the lookup directly
+in the second half of this chain. However, files included with the
+<code>&quot;foo.h&quot;</code> syntax start at the beginning of the chain, but with one
+extra directory prepended. This is the directory of the current file;
+the one containing the <code>#include</code> directive. Prepending this
+directory on a per-file basis is handled by the function
+<code>search_from</code>.
+</p>
+<p>Note that a header included with a directory component, such as
+<code>#include &quot;mydir/foo.h&quot;</code> and opened as
+<samp>/usr/local/include/mydir/foo.h</samp>, will have the complete path minus
+the basename &lsquo;<samp>foo.h</samp>&rsquo; as the current directory.
+</p>
+<p>Enough information is stored in the splay tree that CPP can immediately
+tell whether it can skip the header file because of the multiple include
+optimization, whether the file didn&rsquo;t exist or couldn&rsquo;t be opened for
+some reason, or whether the header was flagged not to be re-used, as it
+is with the obsolete <code>#import</code> directive.
+</p>
+<p>For the benefit of MS-DOS filesystems with an 8.3 filename limitation,
+CPP offers the ability to treat various include file names as aliases
+for the real header files with shorter names. The map from one to the
+other is found in a special file called &lsquo;<samp>header.gcc</samp>&rsquo;, stored in the
+command line (or system) include directories to which the mapping
+applies. This may be higher up the directory tree than the full path to
+the file minus the base name.
+</p>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Concept-Index.html#Concept-Index" accesskey="n" rel="next">Concept Index</a>, Previous: <a href="Guard-Macros.html#Guard-Macros" accesskey="p" rel="previous">Guard Macros</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/share/doc/cppinternals/Guard-Macros.html b/share/doc/cppinternals/Guard-Macros.html
new file mode 100644
index 0000000..72bd485
--- /dev/null
+++ b/share/doc/cppinternals/Guard-Macros.html
@@ -0,0 +1,188 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
+<head>
+<title>The GNU C Preprocessor Internals: Guard Macros</title>
+
+<meta name="description" content="The GNU C Preprocessor Internals: Guard Macros">
+<meta name="keywords" content="The GNU C Preprocessor Internals: Guard Macros">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<link href="index.html#Top" rel="start" title="Top">
+<link href="Concept-Index.html#Concept-Index" rel="index" title="Concept Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="index.html#Top" rel="up" title="Top">
+<link href="Files.html#Files" rel="next" title="Files">
+<link href="Line-Numbering.html#Line-Numbering" rel="previous" title="Line Numbering">
+<style type="text/css">
+<!--
+a.summary-letter {text-decoration: none}
+blockquote.smallquotation {font-size: smaller}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+div.indentedblock {margin-left: 3.2em}
+div.lisp {margin-left: 3.2em}
+div.smalldisplay {margin-left: 3.2em}
+div.smallexample {margin-left: 3.2em}
+div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
+div.smalllisp {margin-left: 3.2em}
+kbd {font-style:oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+pre.smalldisplay {font-family: inherit; font-size: smaller}
+pre.smallexample {font-size: smaller}
+pre.smallformat {font-family: inherit; font-size: smaller}
+pre.smalllisp {font-size: smaller}
+span.nocodebreak {white-space:nowrap}
+span.nolinebreak {white-space:nowrap}
+span.roman {font-family:serif; font-weight:normal}
+span.sansserif {font-family:sans-serif; font-weight:normal}
+ul.no-bullet {list-style: none}
+-->
+</style>
+
+
+</head>
+
+<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+<a name="Guard-Macros"></a>
+<div class="header">
+<p>
+Next: <a href="Files.html#Files" accesskey="n" rel="next">Files</a>, Previous: <a href="Line-Numbering.html#Line-Numbering" accesskey="p" rel="previous">Line Numbering</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<a name="The-Multiple_002dInclude-Optimization"></a>
+<h2 class="unnumbered">The Multiple-Include Optimization</h2>
+<a name="index-guard-macros"></a>
+<a name="index-controlling-macros"></a>
+<a name="index-multiple_002dinclude-optimization"></a>
+
+<p>Header files are often of the form
+</p>
+<div class="smallexample">
+<pre class="smallexample">#ifndef FOO
+#define FOO
+&hellip;
+#endif
+</pre></div>
+
+<p>to prevent the compiler from processing them more than once. The
+preprocessor notices such header files, so that if the header file
+appears in a subsequent <code>#include</code> directive and <code>FOO</code> is
+defined, then it is ignored and it doesn&rsquo;t preprocess or even re-open
+the file a second time. This is referred to as the <em>multiple
+include optimization</em>.
+</p>
+<p>Under what circumstances is such an optimization valid? If the file
+were included a second time, it can only be optimized away if that
+inclusion would result in no tokens to return, and no relevant
+directives to process. Therefore the current implementation imposes
+requirements and makes some allowances as follows:
+</p>
+<ol>
+<li> There must be no tokens outside the controlling <code>#if</code>-<code>#endif</code>
+pair, but whitespace and comments are permitted.
+
+</li><li> There must be no directives outside the controlling directive pair, but
+the <em>null directive</em> (a line containing nothing other than a single
+&lsquo;<samp>#</samp>&rsquo; and possibly whitespace) is permitted.
+
+</li><li> The opening directive must be of the form
+
+<div class="smallexample">
+<pre class="smallexample">#ifndef FOO
+</pre></div>
+
+<p>or
+</p>
+<div class="smallexample">
+<pre class="smallexample">#if !defined FOO [equivalently, #if !defined(FOO)]
+</pre></div>
+
+</li><li> In the second form above, the tokens forming the <code>#if</code> expression
+must have come directly from the source file&mdash;no macro expansion must
+have been involved. This is because macro definitions can change, and
+tracking whether or not a relevant change has been made is not worth the
+implementation cost.
+
+</li><li> There can be no <code>#else</code> or <code>#elif</code> directives at the outer
+conditional block level, because they would probably contain something
+of interest to a subsequent pass.
+</li></ol>
+
+<p>First, when pushing a new file on the buffer stack,
+<code>_stack_include_file</code> sets the controlling macro <code>mi_cmacro</code> to
+<code>NULL</code>, and sets <code>mi_valid</code> to <code>true</code>. This indicates
+that the preprocessor has not yet encountered anything that would
+invalidate the multiple-include optimization. As described in the next
+few paragraphs, these two variables having these values effectively
+indicates top-of-file.
+</p>
+<p>When about to return a token that is not part of a directive,
+<code>_cpp_lex_token</code> sets <code>mi_valid</code> to <code>false</code>. This
+enforces the constraint that tokens outside the controlling conditional
+block invalidate the optimization.
+</p>
+<p>The <code>do_if</code>, when appropriate, and <code>do_ifndef</code> directive
+handlers pass the controlling macro to the function
+<code>push_conditional</code>. cpplib maintains a stack of nested conditional
+blocks, and after processing every opening conditional this function
+pushes an <code>if_stack</code> structure onto the stack. In this structure
+it records the controlling macro for the block, provided there is one
+and we&rsquo;re at top-of-file (as described above). If an <code>#elif</code> or
+<code>#else</code> directive is encountered, the controlling macro for that
+block is cleared to <code>NULL</code>. Otherwise, it survives until the
+<code>#endif</code> closing the block, upon which <code>do_endif</code> sets
+<code>mi_valid</code> to true and stores the controlling macro in
+<code>mi_cmacro</code>.
+</p>
+<p><code>_cpp_handle_directive</code> clears <code>mi_valid</code> when processing any
+directive other than an opening conditional and the null directive.
+With this, and requiring top-of-file to record a controlling macro, and
+no <code>#else</code> or <code>#elif</code> for it to survive and be copied to
+<code>mi_cmacro</code> by <code>do_endif</code>, we have enforced the absence of
+directives outside the main conditional block for the optimization to be
+on.
+</p>
+<p>Note that whilst we are inside the conditional block, <code>mi_valid</code> is
+likely to be reset to <code>false</code>, but this does not matter since
+the closing <code>#endif</code> restores it to <code>true</code> if appropriate.
+</p>
+<p>Finally, since <code>_cpp_lex_direct</code> pops the file off the buffer stack
+at <code>EOF</code> without returning a token, if the <code>#endif</code> directive
+was not followed by any tokens, <code>mi_valid</code> is <code>true</code> and
+<code>_cpp_pop_file_buffer</code> remembers the controlling macro associated
+with the file. Subsequent calls to <code>stack_include_file</code> result in
+no buffer being pushed if the controlling macro is defined, effecting
+the optimization.
+</p>
+<p>A quick word on how we handle the
+</p>
+<div class="smallexample">
+<pre class="smallexample">#if !defined FOO
+</pre></div>
+
+<p>case. <code>_cpp_parse_expr</code> and <code>parse_defined</code> take steps to see
+whether the three stages &lsquo;<samp>!</samp>&rsquo;, &lsquo;<samp>defined-expression</samp>&rsquo; and
+&lsquo;<samp>end-of-directive</samp>&rsquo; occur in order in a <code>#if</code> expression. If
+so, they return the guard macro to <code>do_if</code> in the variable
+<code>mi_ind_cmacro</code>, and otherwise set it to <code>NULL</code>.
+<code>enter_macro_context</code> sets <code>mi_valid</code> to false, so if a macro
+was expanded whilst parsing any part of the expression, then the
+top-of-file test in <code>push_conditional</code> fails and the optimization
+is turned off.
+</p>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Files.html#Files" accesskey="n" rel="next">Files</a>, Previous: <a href="Line-Numbering.html#Line-Numbering" accesskey="p" rel="previous">Line Numbering</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/share/doc/cppinternals/Hash-Nodes.html b/share/doc/cppinternals/Hash-Nodes.html
new file mode 100644
index 0000000..2d1f2a2
--- /dev/null
+++ b/share/doc/cppinternals/Hash-Nodes.html
@@ -0,0 +1,130 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
+<head>
+<title>The GNU C Preprocessor Internals: Hash Nodes</title>
+
+<meta name="description" content="The GNU C Preprocessor Internals: Hash Nodes">
+<meta name="keywords" content="The GNU C Preprocessor Internals: Hash Nodes">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<link href="index.html#Top" rel="start" title="Top">
+<link href="Concept-Index.html#Concept-Index" rel="index" title="Concept Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="index.html#Top" rel="up" title="Top">
+<link href="Macro-Expansion.html#Macro-Expansion" rel="next" title="Macro Expansion">
+<link href="Lexer.html#Lexer" rel="previous" title="Lexer">
+<style type="text/css">
+<!--
+a.summary-letter {text-decoration: none}
+blockquote.smallquotation {font-size: smaller}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+div.indentedblock {margin-left: 3.2em}
+div.lisp {margin-left: 3.2em}
+div.smalldisplay {margin-left: 3.2em}
+div.smallexample {margin-left: 3.2em}
+div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
+div.smalllisp {margin-left: 3.2em}
+kbd {font-style:oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+pre.smalldisplay {font-family: inherit; font-size: smaller}
+pre.smallexample {font-size: smaller}
+pre.smallformat {font-family: inherit; font-size: smaller}
+pre.smalllisp {font-size: smaller}
+span.nocodebreak {white-space:nowrap}
+span.nolinebreak {white-space:nowrap}
+span.roman {font-family:serif; font-weight:normal}
+span.sansserif {font-family:sans-serif; font-weight:normal}
+ul.no-bullet {list-style: none}
+-->
+</style>
+
+
+</head>
+
+<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+<a name="Hash-Nodes"></a>
+<div class="header">
+<p>
+Next: <a href="Macro-Expansion.html#Macro-Expansion" accesskey="n" rel="next">Macro Expansion</a>, Previous: <a href="Lexer.html#Lexer" accesskey="p" rel="previous">Lexer</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<a name="Hash-Nodes-1"></a>
+<h2 class="unnumbered">Hash Nodes</h2>
+<a name="index-hash-table"></a>
+<a name="index-identifiers"></a>
+<a name="index-macros"></a>
+<a name="index-assertions"></a>
+<a name="index-named-operators"></a>
+
+<p>When cpplib encounters an &ldquo;identifier&rdquo;, it generates a hash code for
+it and stores it in the hash table. By &ldquo;identifier&rdquo; we mean tokens
+with type <code>CPP_NAME</code>; this includes identifiers in the usual C
+sense, as well as keywords, directive names, macro names and so on. For
+example, all of <code>pragma</code>, <code>int</code>, <code>foo</code> and
+<code>__GNUC__</code> are identifiers and hashed when lexed.
+</p>
+<p>Each node in the hash table contain various information about the
+identifier it represents. For example, its length and type. At any one
+time, each identifier falls into exactly one of three categories:
+</p>
+<ul>
+<li> Macros
+
+<p>These have been declared to be macros, either on the command line or
+with <code>#define</code>. A few, such as <code>__TIME__</code> are built-ins
+entered in the hash table during initialization. The hash node for a
+normal macro points to a structure with more information about the
+macro, such as whether it is function-like, how many arguments it takes,
+and its expansion. Built-in macros are flagged as special, and instead
+contain an enum indicating which of the various built-in macros it is.
+</p>
+</li><li> Assertions
+
+<p>Assertions are in a separate namespace to macros. To enforce this, cpp
+actually prepends a <code>#</code> character before hashing and entering it in
+the hash table. An assertion&rsquo;s node points to a chain of answers to
+that assertion.
+</p>
+</li><li> Void
+
+<p>Everything else falls into this category&mdash;an identifier that is not
+currently a macro, or a macro that has since been undefined with
+<code>#undef</code>.
+</p>
+<p>When preprocessing C++, this category also includes the named operators,
+such as <code>xor</code>. In expressions these behave like the operators they
+represent, but in contexts where the spelling of a token matters they
+are spelt differently. This spelling distinction is relevant when they
+are operands of the stringizing and pasting macro operators <code>#</code> and
+<code>##</code>. Named operator hash nodes are flagged, both to catch the
+spelling distinction and to prevent them from being defined as macros.
+</p></li></ul>
+
+<p>The same identifiers share the same hash node. Since each identifier
+token, after lexing, contains a pointer to its hash node, this is used
+to provide rapid lookup of various information. For example, when
+parsing a <code>#define</code> statement, CPP flags each argument&rsquo;s identifier
+hash node with the index of that argument. This makes duplicated
+argument checking an O(1) operation for each argument. Similarly, for
+each identifier in the macro&rsquo;s expansion, lookup to see if it is an
+argument, and which argument it is, is also an O(1) operation. Further,
+each directive name, such as <code>endif</code>, has an associated directive
+enum stored in its hash node, so that directive lookup is also O(1).
+</p>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Macro-Expansion.html#Macro-Expansion" accesskey="n" rel="next">Macro Expansion</a>, Previous: <a href="Lexer.html#Lexer" accesskey="p" rel="previous">Lexer</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/share/doc/cppinternals/Invalid-identifiers.html b/share/doc/cppinternals/Invalid-identifiers.html
new file mode 100644
index 0000000..b201d55
--- /dev/null
+++ b/share/doc/cppinternals/Invalid-identifiers.html
@@ -0,0 +1,50 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
+<!-- This file redirects to the location of a node or anchor -->
+<head>
+<title>The GNU C Preprocessor Internals: Invalid identifiers</title>
+
+<meta name="description" content="The GNU C Preprocessor Internals: Invalid identifiers">
+<meta name="keywords" content="The GNU C Preprocessor Internals: Invalid identifiers">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<style type="text/css">
+<!--
+a.summary-letter {text-decoration: none}
+blockquote.smallquotation {font-size: smaller}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+div.indentedblock {margin-left: 3.2em}
+div.lisp {margin-left: 3.2em}
+div.smalldisplay {margin-left: 3.2em}
+div.smallexample {margin-left: 3.2em}
+div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
+div.smalllisp {margin-left: 3.2em}
+kbd {font-style:oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+pre.smalldisplay {font-family: inherit; font-size: smaller}
+pre.smallexample {font-size: smaller}
+pre.smallformat {font-family: inherit; font-size: smaller}
+pre.smalllisp {font-size: smaller}
+span.nocodebreak {white-space:nowrap}
+span.nolinebreak {white-space:nowrap}
+span.roman {font-family:serif; font-weight:normal}
+span.sansserif {font-family:sans-serif; font-weight:normal}
+ul.no-bullet {list-style: none}
+-->
+</style>
+
+<meta http-equiv="Refresh" content="0; url=Lexer.html#Invalid-identifiers">
+
+</head>
+
+<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+
+<p>The node you are looking for is at <a href="Lexer.html#Invalid-identifiers">Invalid identifiers</a>.</p>
+</body>
diff --git a/share/doc/cppinternals/Lexer.html b/share/doc/cppinternals/Lexer.html
new file mode 100644
index 0000000..51465a1
--- /dev/null
+++ b/share/doc/cppinternals/Lexer.html
@@ -0,0 +1,346 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
+<head>
+<title>The GNU C Preprocessor Internals: Lexer</title>
+
+<meta name="description" content="The GNU C Preprocessor Internals: Lexer">
+<meta name="keywords" content="The GNU C Preprocessor Internals: Lexer">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<link href="index.html#Top" rel="start" title="Top">
+<link href="Concept-Index.html#Concept-Index" rel="index" title="Concept Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="index.html#Top" rel="up" title="Top">
+<link href="Hash-Nodes.html#Hash-Nodes" rel="next" title="Hash Nodes">
+<link href="Conventions.html#Conventions" rel="previous" title="Conventions">
+<style type="text/css">
+<!--
+a.summary-letter {text-decoration: none}
+blockquote.smallquotation {font-size: smaller}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+div.indentedblock {margin-left: 3.2em}
+div.lisp {margin-left: 3.2em}
+div.smalldisplay {margin-left: 3.2em}
+div.smallexample {margin-left: 3.2em}
+div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
+div.smalllisp {margin-left: 3.2em}
+kbd {font-style:oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+pre.smalldisplay {font-family: inherit; font-size: smaller}
+pre.smallexample {font-size: smaller}
+pre.smallformat {font-family: inherit; font-size: smaller}
+pre.smalllisp {font-size: smaller}
+span.nocodebreak {white-space:nowrap}
+span.nolinebreak {white-space:nowrap}
+span.roman {font-family:serif; font-weight:normal}
+span.sansserif {font-family:sans-serif; font-weight:normal}
+ul.no-bullet {list-style: none}
+-->
+</style>
+
+
+</head>
+
+<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+<a name="Lexer"></a>
+<div class="header">
+<p>
+Next: <a href="Hash-Nodes.html#Hash-Nodes" accesskey="n" rel="next">Hash Nodes</a>, Previous: <a href="Conventions.html#Conventions" accesskey="p" rel="previous">Conventions</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<a name="The-Lexer"></a>
+<h2 class="unnumbered">The Lexer</h2>
+<a name="index-lexer"></a>
+<a name="index-newlines"></a>
+<a name="index-escaped-newlines"></a>
+
+<a name="Overview"></a>
+<h3 class="section">Overview</h3>
+<p>The lexer is contained in the file <samp>lex.cc</samp>. It is a hand-coded
+lexer, and not implemented as a state machine. It can understand C, C++
+and Objective-C source code, and has been extended to allow reasonably
+successful preprocessing of assembly language. The lexer does not make
+an initial pass to strip out trigraphs and escaped newlines, but handles
+them as they are encountered in a single pass of the input file. It
+returns preprocessing tokens individually, not a line at a time.
+</p>
+<p>It is mostly transparent to users of the library, since the library&rsquo;s
+interface for obtaining the next token, <code>cpp_get_token</code>, takes care
+of lexing new tokens, handling directives, and expanding macros as
+necessary. However, the lexer does expose some functionality so that
+clients of the library can easily spell a given token, such as
+<code>cpp_spell_token</code> and <code>cpp_token_len</code>. These functions are
+useful when generating diagnostics, and for emitting the preprocessed
+output.
+</p>
+<a name="Lexing-a-token"></a>
+<h3 class="section">Lexing a token</h3>
+<p>Lexing of an individual token is handled by <code>_cpp_lex_direct</code> and
+its subroutines. In its current form the code is quite complicated,
+with read ahead characters and such-like, since it strives to not step
+back in the character stream in preparation for handling non-ASCII file
+encodings. The current plan is to convert any such files to UTF-8
+before processing them. This complexity is therefore unnecessary and
+will be removed, so I&rsquo;ll not discuss it further here.
+</p>
+<p>The job of <code>_cpp_lex_direct</code> is simply to lex a token. It is not
+responsible for issues like directive handling, returning lookahead
+tokens directly, multiple-include optimization, or conditional block
+skipping. It necessarily has a minor r&ocirc;le to play in memory
+management of lexed lines. I discuss these issues in a separate section
+(see <a href="#Lexing-a-line">Lexing a line</a>).
+</p>
+<p>The lexer places the token it lexes into storage pointed to by the
+variable <code>cur_token</code>, and then increments it. This variable is
+important for correct diagnostic positioning. Unless a specific line
+and column are passed to the diagnostic routines, they will examine the
+<code>line</code> and <code>col</code> values of the token just before the location
+that <code>cur_token</code> points to, and use that location to report the
+diagnostic.
+</p>
+<p>The lexer does not consider whitespace to be a token in its own right.
+If whitespace (other than a new line) precedes a token, it sets the
+<code>PREV_WHITE</code> bit in the token&rsquo;s flags. Each token has its
+<code>line</code> and <code>col</code> variables set to the line and column of the
+first character of the token. This line number is the line number in
+the translation unit, and can be converted to a source (file, line) pair
+using the line map code.
+</p>
+<p>The first token on a logical, i.e. unescaped, line has the flag
+<code>BOL</code> set for beginning-of-line. This flag is intended for
+internal use, both to distinguish a &lsquo;<samp>#</samp>&rsquo; that begins a directive
+from one that doesn&rsquo;t, and to generate a call-back to clients that want
+to be notified about the start of every non-directive line with tokens
+on it. Clients cannot reliably determine this for themselves: the first
+token might be a macro, and the tokens of a macro expansion do not have
+the <code>BOL</code> flag set. The macro expansion may even be empty, and the
+next token on the line certainly won&rsquo;t have the <code>BOL</code> flag set.
+</p>
+<p>New lines are treated specially; exactly how the lexer handles them is
+context-dependent. The C standard mandates that directives are
+terminated by the first unescaped newline character, even if it appears
+in the middle of a macro expansion. Therefore, if the state variable
+<code>in_directive</code> is set, the lexer returns a <code>CPP_EOF</code> token,
+which is normally used to indicate end-of-file, to indicate
+end-of-directive. In a directive a <code>CPP_EOF</code> token never means
+end-of-file. Conveniently, if the caller was <code>collect_args</code>, it
+already handles <code>CPP_EOF</code> as if it were end-of-file, and reports an
+error about an unterminated macro argument list.
+</p>
+<p>The C standard also specifies that a new line in the middle of the
+arguments to a macro is treated as whitespace. This white space is
+important in case the macro argument is stringized. The state variable
+<code>parsing_args</code> is nonzero when the preprocessor is collecting the
+arguments to a macro call. It is set to 1 when looking for the opening
+parenthesis to a function-like macro, and 2 when collecting the actual
+arguments up to the closing parenthesis, since these two cases need to
+be distinguished sometimes. One such time is here: the lexer sets the
+<code>PREV_WHITE</code> flag of a token if it meets a new line when
+<code>parsing_args</code> is set to 2. It doesn&rsquo;t set it if it meets a new
+line when <code>parsing_args</code> is 1, since then code like
+</p>
+<div class="smallexample">
+<pre class="smallexample">#define foo() bar
+foo
+baz
+</pre></div>
+
+<p>would be output with an erroneous space before &lsquo;<samp>baz</samp>&rsquo;:
+</p>
+<div class="smallexample">
+<pre class="smallexample">foo
+ baz
+</pre></div>
+
+<p>This is a good example of the subtlety of getting token spacing correct
+in the preprocessor; there are plenty of tests in the testsuite for
+corner cases like this.
+</p>
+<p>The lexer is written to treat each of &lsquo;<samp>\r</samp>&rsquo;, &lsquo;<samp>\n</samp>&rsquo;, &lsquo;<samp>\r\n</samp>&rsquo;
+and &lsquo;<samp>\n\r</samp>&rsquo; as a single new line indicator. This allows it to
+transparently preprocess MS-DOS, Macintosh and Unix files without their
+needing to pass through a special filter beforehand.
+</p>
+<p>We also decided to treat a backslash, either &lsquo;<samp>\</samp>&rsquo; or the trigraph
+&lsquo;<samp>??/</samp>&rsquo;, separated from one of the above newline indicators by
+non-comment whitespace only, as intending to escape the newline. It
+tends to be a typing mistake, and cannot reasonably be mistaken for
+anything else in any of the C-family grammars. Since handling it this
+way is not strictly conforming to the ISO standard, the library issues a
+warning wherever it encounters it.
+</p>
+<p>Handling newlines like this is made simpler by doing it in one place
+only. The function <code>handle_newline</code> takes care of all newline
+characters, and <code>skip_escaped_newlines</code> takes care of arbitrarily
+long sequences of escaped newlines, deferring to <code>handle_newline</code>
+to handle the newlines themselves.
+</p>
+<p>The most painful aspect of lexing ISO-standard C and C++ is handling
+trigraphs and backlash-escaped newlines. Trigraphs are processed before
+any interpretation of the meaning of a character is made, and unfortunately
+there is a trigraph representation for a backslash, so it is possible for
+the trigraph &lsquo;<samp>??/</samp>&rsquo; to introduce an escaped newline.
+</p>
+<p>Escaped newlines are tedious because theoretically they can occur
+anywhere&mdash;between the &lsquo;<samp>+</samp>&rsquo; and &lsquo;<samp>=</samp>&rsquo; of the &lsquo;<samp>+=</samp>&rsquo; token,
+within the characters of an identifier, and even between the &lsquo;<samp>*</samp>&rsquo;
+and &lsquo;<samp>/</samp>&rsquo; that terminates a comment. Moreover, you cannot be sure
+there is just one&mdash;there might be an arbitrarily long sequence of them.
+</p>
+<p>So, for example, the routine that lexes a number, <code>parse_number</code>,
+cannot assume that it can scan forwards until the first non-number
+character and be done with it, because this could be the &lsquo;<samp>\</samp>&rsquo;
+introducing an escaped newline, or the &lsquo;<samp>?</samp>&rsquo; introducing the trigraph
+sequence that represents the &lsquo;<samp>\</samp>&rsquo; of an escaped newline. If it
+encounters a &lsquo;<samp>?</samp>&rsquo; or &lsquo;<samp>\</samp>&rsquo;, it calls <code>skip_escaped_newlines</code>
+to skip over any potential escaped newlines before checking whether the
+number has been finished.
+</p>
+<p>Similarly code in the main body of <code>_cpp_lex_direct</code> cannot simply
+check for a &lsquo;<samp>=</samp>&rsquo; after a &lsquo;<samp>+</samp>&rsquo; character to determine whether it
+has a &lsquo;<samp>+=</samp>&rsquo; token; it needs to be prepared for an escaped newline of
+some sort. Such cases use the function <code>get_effective_char</code>, which
+returns the first character after any intervening escaped newlines.
+</p>
+<p>The lexer needs to keep track of the correct column position, including
+counting tabs as specified by the <samp>-ftabstop=</samp> option. This
+should be done even within C-style comments; they can appear in the
+middle of a line, and we want to report diagnostics in the correct
+position for text appearing after the end of the comment.
+</p>
+<a name="Invalid-identifiers"></a><p>Some identifiers, such as <code>__VA_ARGS__</code> and poisoned identifiers,
+may be invalid and require a diagnostic. However, if they appear in a
+macro expansion we don&rsquo;t want to complain with each use of the macro.
+It is therefore best to catch them during the lexing stage, in
+<code>parse_identifier</code>. In both cases, whether a diagnostic is needed
+or not is dependent upon the lexer&rsquo;s state. For example, we don&rsquo;t want
+to issue a diagnostic for re-poisoning a poisoned identifier, or for
+using <code>__VA_ARGS__</code> in the expansion of a variable-argument macro.
+Therefore <code>parse_identifier</code> makes use of state flags to determine
+whether a diagnostic is appropriate. Since we change state on a
+per-token basis, and don&rsquo;t lex whole lines at a time, this is not a
+problem.
+</p>
+<p>Another place where state flags are used to change behavior is whilst
+lexing header names. Normally, a &lsquo;<samp>&lt;</samp>&rsquo; would be lexed as a single
+token. After a <code>#include</code> directive, though, it should be lexed as
+a single token as far as the nearest &lsquo;<samp>&gt;</samp>&rsquo; character. Note that we
+don&rsquo;t allow the terminators of header names to be escaped; the first
+&lsquo;<samp>&quot;</samp>&rsquo; or &lsquo;<samp>&gt;</samp>&rsquo; terminates the header name.
+</p>
+<p>Interpretation of some character sequences depends upon whether we are
+lexing C, C++ or Objective-C, and on the revision of the standard in
+force. For example, &lsquo;<samp>::</samp>&rsquo; is a single token in C++, but in C it is
+two separate &lsquo;<samp>:</samp>&rsquo; tokens and almost certainly a syntax error. Such
+cases are handled by <code>_cpp_lex_direct</code> based upon command-line
+flags stored in the <code>cpp_options</code> structure.
+</p>
+<p>Once a token has been lexed, it leads an independent existence. The
+spelling of numbers, identifiers and strings is copied to permanent
+storage from the original input buffer, so a token remains valid and
+correct even if its source buffer is freed with <code>_cpp_pop_buffer</code>.
+The storage holding the spellings of such tokens remains until the
+client program calls cpp_destroy, probably at the end of the translation
+unit.
+</p>
+<a name="Lexing-a-line"></a><a name="Lexing-a-line-1"></a>
+<h3 class="section">Lexing a line</h3>
+<a name="index-token-run"></a>
+
+<p>When the preprocessor was changed to return pointers to tokens, one
+feature I wanted was some sort of guarantee regarding how long a
+returned pointer remains valid. This is important to the stand-alone
+preprocessor, the future direction of the C family front ends, and even
+to cpplib itself internally.
+</p>
+<p>Occasionally the preprocessor wants to be able to peek ahead in the
+token stream. For example, after the name of a function-like macro, it
+wants to check the next token to see if it is an opening parenthesis.
+Another example is that, after reading the first few tokens of a
+<code>#pragma</code> directive and not recognizing it as a registered pragma,
+it wants to backtrack and allow the user-defined handler for unknown
+pragmas to access the full <code>#pragma</code> token stream. The stand-alone
+preprocessor wants to be able to test the current token with the
+previous one to see if a space needs to be inserted to preserve their
+separate tokenization upon re-lexing (paste avoidance), so it needs to
+be sure the pointer to the previous token is still valid. The
+recursive-descent C++ parser wants to be able to perform tentative
+parsing arbitrarily far ahead in the token stream, and then to be able
+to jump back to a prior position in that stream if necessary.
+</p>
+<p>The rule I chose, which is fairly natural, is to arrange that the
+preprocessor lex all tokens on a line consecutively into a token buffer,
+which I call a <em>token run</em>, and when meeting an unescaped new line
+(newlines within comments do not count either), to start lexing back at
+the beginning of the run. Note that we do <em>not</em> lex a line of
+tokens at once; if we did that <code>parse_identifier</code> would not have
+state flags available to warn about invalid identifiers (see <a href="#Invalid-identifiers">Invalid identifiers</a>).
+</p>
+<p>In other words, accessing tokens that appeared earlier in the current
+line is valid, but since each logical line overwrites the tokens of the
+previous line, tokens from prior lines are unavailable. In particular,
+since a directive only occupies a single logical line, this means that
+the directive handlers like the <code>#pragma</code> handler can jump around
+in the directive&rsquo;s tokens if necessary.
+</p>
+<p>Two issues remain: what about tokens that arise from macro expansions,
+and what happens when we have a long line that overflows the token run?
+</p>
+<p>Since we promise clients that we preserve the validity of pointers that
+we have already returned for tokens that appeared earlier in the line,
+we cannot reallocate the run. Instead, on overflow it is expanded by
+chaining a new token run on to the end of the existing one.
+</p>
+<p>The tokens forming a macro&rsquo;s replacement list are collected by the
+<code>#define</code> handler, and placed in storage that is only freed by
+<code>cpp_destroy</code>. So if a macro is expanded in the line of tokens,
+the pointers to the tokens of its expansion that are returned will always
+remain valid. However, macros are a little trickier than that, since
+they give rise to three sources of fresh tokens. They are the built-in
+macros like <code>__LINE__</code>, and the &lsquo;<samp>#</samp>&rsquo; and &lsquo;<samp>##</samp>&rsquo; operators
+for stringizing and token pasting. I handled this by allocating
+space for these tokens from the lexer&rsquo;s token run chain. This means
+they automatically receive the same lifetime guarantees as lexed tokens,
+and we don&rsquo;t need to concern ourselves with freeing them.
+</p>
+<p>Lexing into a line of tokens solves some of the token memory management
+issues, but not all. The opening parenthesis after a function-like
+macro name might lie on a different line, and the front ends definitely
+want the ability to look ahead past the end of the current line. So
+cpplib only moves back to the start of the token run at the end of a
+line if the variable <code>keep_tokens</code> is zero. Line-buffering is
+quite natural for the preprocessor, and as a result the only time cpplib
+needs to increment this variable is whilst looking for the opening
+parenthesis to, and reading the arguments of, a function-like macro. In
+the near future cpplib will export an interface to increment and
+decrement this variable, so that clients can share full control over the
+lifetime of token pointers too.
+</p>
+<p>The routine <code>_cpp_lex_token</code> handles moving to new token runs,
+calling <code>_cpp_lex_direct</code> to lex new tokens, or returning
+previously-lexed tokens if we stepped back in the token stream. It also
+checks each token for the <code>BOL</code> flag, which might indicate a
+directive that needs to be handled, or require a start-of-line call-back
+to be made. <code>_cpp_lex_token</code> also handles skipping over tokens in
+failed conditional blocks, and invalidates the control macro of the
+multiple-include optimization if a token was successfully lexed outside
+a directive. In other words, its callers do not need to concern
+themselves with such issues.
+</p>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Hash-Nodes.html#Hash-Nodes" accesskey="n" rel="next">Hash Nodes</a>, Previous: <a href="Conventions.html#Conventions" accesskey="p" rel="previous">Conventions</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/share/doc/cppinternals/Lexing-a-line.html b/share/doc/cppinternals/Lexing-a-line.html
new file mode 100644
index 0000000..e7cf1f2
--- /dev/null
+++ b/share/doc/cppinternals/Lexing-a-line.html
@@ -0,0 +1,50 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
+<!-- This file redirects to the location of a node or anchor -->
+<head>
+<title>The GNU C Preprocessor Internals: Lexing a line</title>
+
+<meta name="description" content="The GNU C Preprocessor Internals: Lexing a line">
+<meta name="keywords" content="The GNU C Preprocessor Internals: Lexing a line">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<style type="text/css">
+<!--
+a.summary-letter {text-decoration: none}
+blockquote.smallquotation {font-size: smaller}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+div.indentedblock {margin-left: 3.2em}
+div.lisp {margin-left: 3.2em}
+div.smalldisplay {margin-left: 3.2em}
+div.smallexample {margin-left: 3.2em}
+div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
+div.smalllisp {margin-left: 3.2em}
+kbd {font-style:oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+pre.smalldisplay {font-family: inherit; font-size: smaller}
+pre.smallexample {font-size: smaller}
+pre.smallformat {font-family: inherit; font-size: smaller}
+pre.smalllisp {font-size: smaller}
+span.nocodebreak {white-space:nowrap}
+span.nolinebreak {white-space:nowrap}
+span.roman {font-family:serif; font-weight:normal}
+span.sansserif {font-family:sans-serif; font-weight:normal}
+ul.no-bullet {list-style: none}
+-->
+</style>
+
+<meta http-equiv="Refresh" content="0; url=Lexer.html#Lexing-a-line">
+
+</head>
+
+<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+
+<p>The node you are looking for is at <a href="Lexer.html#Lexing-a-line">Lexing a line</a>.</p>
+</body>
diff --git a/share/doc/cppinternals/Line-Numbering.html b/share/doc/cppinternals/Line-Numbering.html
new file mode 100644
index 0000000..606a8b0
--- /dev/null
+++ b/share/doc/cppinternals/Line-Numbering.html
@@ -0,0 +1,158 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
+<head>
+<title>The GNU C Preprocessor Internals: Line Numbering</title>
+
+<meta name="description" content="The GNU C Preprocessor Internals: Line Numbering">
+<meta name="keywords" content="The GNU C Preprocessor Internals: Line Numbering">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<link href="index.html#Top" rel="start" title="Top">
+<link href="Concept-Index.html#Concept-Index" rel="index" title="Concept Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="index.html#Top" rel="up" title="Top">
+<link href="Guard-Macros.html#Guard-Macros" rel="next" title="Guard Macros">
+<link href="Token-Spacing.html#Token-Spacing" rel="previous" title="Token Spacing">
+<style type="text/css">
+<!--
+a.summary-letter {text-decoration: none}
+blockquote.smallquotation {font-size: smaller}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+div.indentedblock {margin-left: 3.2em}
+div.lisp {margin-left: 3.2em}
+div.smalldisplay {margin-left: 3.2em}
+div.smallexample {margin-left: 3.2em}
+div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
+div.smalllisp {margin-left: 3.2em}
+kbd {font-style:oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+pre.smalldisplay {font-family: inherit; font-size: smaller}
+pre.smallexample {font-size: smaller}
+pre.smallformat {font-family: inherit; font-size: smaller}
+pre.smalllisp {font-size: smaller}
+span.nocodebreak {white-space:nowrap}
+span.nolinebreak {white-space:nowrap}
+span.roman {font-family:serif; font-weight:normal}
+span.sansserif {font-family:sans-serif; font-weight:normal}
+ul.no-bullet {list-style: none}
+-->
+</style>
+
+
+</head>
+
+<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+<a name="Line-Numbering"></a>
+<div class="header">
+<p>
+Next: <a href="Guard-Macros.html#Guard-Macros" accesskey="n" rel="next">Guard Macros</a>, Previous: <a href="Token-Spacing.html#Token-Spacing" accesskey="p" rel="previous">Token Spacing</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<a name="Line-numbering"></a>
+<h2 class="unnumbered">Line numbering</h2>
+<a name="index-line-numbers"></a>
+
+<a name="Just-which-line-number-anyway_003f"></a>
+<h3 class="section">Just which line number anyway?</h3>
+
+<p>There are three reasonable requirements a cpplib client might have for
+the line number of a token passed to it:
+</p>
+<ul>
+<li> The source line it was lexed on.
+</li><li> The line it is output on. This can be different to the line it was
+lexed on if, for example, there are intervening escaped newlines or
+C-style comments. For example:
+
+<div class="smallexample">
+<pre class="smallexample">foo /* <span class="roman">A long
+comment</span> */ bar \
+baz
+&rArr;
+foo bar baz
+</pre></div>
+
+</li><li> If the token results from a macro expansion, the line of the macro name,
+or possibly the line of the closing parenthesis in the case of
+function-like macro expansion.
+</li></ul>
+
+<p>The <code>cpp_token</code> structure contains <code>line</code> and <code>col</code>
+members. The lexer fills these in with the line and column of the first
+character of the token. Consequently, but maybe unexpectedly, a token
+from the replacement list of a macro expansion carries the location of
+the token within the <code>#define</code> directive, because cpplib expands a
+macro by returning pointers to the tokens in its replacement list. The
+current implementation of cpplib assigns tokens created from built-in
+macros and the &lsquo;<samp>#</samp>&rsquo; and &lsquo;<samp>##</samp>&rsquo; operators the location of the most
+recently lexed token. This is a because they are allocated from the
+lexer&rsquo;s token runs, and because of the way the diagnostic routines infer
+the appropriate location to report.
+</p>
+<p>The diagnostic routines in cpplib display the location of the most
+recently <em>lexed</em> token, unless they are passed a specific line and
+column to report. For diagnostics regarding tokens that arise from
+macro expansions, it might also be helpful for the user to see the
+original location in the macro definition that the token came from.
+Since that is exactly the information each token carries, such an
+enhancement could be made relatively easily in future.
+</p>
+<p>The stand-alone preprocessor faces a similar problem when determining
+the correct line to output the token on: the position attached to a
+token is fairly useless if the token came from a macro expansion. All
+tokens on a logical line should be output on its first physical line, so
+the token&rsquo;s reported location is also wrong if it is part of a physical
+line other than the first.
+</p>
+<p>To solve these issues, cpplib provides a callback that is generated
+whenever it lexes a preprocessing token that starts a new logical line
+other than a directive. It passes this token (which may be a
+<code>CPP_EOF</code> token indicating the end of the translation unit) to the
+callback routine, which can then use the line and column of this token
+to produce correct output.
+</p>
+<a name="Representation-of-line-numbers"></a>
+<h3 class="section">Representation of line numbers</h3>
+
+<p>As mentioned above, cpplib stores with each token the line number that
+it was lexed on. In fact, this number is not the number of the line in
+the source file, but instead bears more resemblance to the number of the
+line in the translation unit.
+</p>
+<p>The preprocessor maintains a monotonic increasing line count, which is
+incremented at every new line character (and also at the end of any
+buffer that does not end in a new line). Since a line number of zero is
+useful to indicate certain special states and conditions, this variable
+starts counting from one.
+</p>
+<p>This variable therefore uniquely enumerates each line in the translation
+unit. With some simple infrastructure, it is straight forward to map
+from this to the original source file and line number pair, saving space
+whenever line number information needs to be saved. The code the
+implements this mapping lies in the files <samp>line-map.cc</samp> and
+<samp>line-map.h</samp>.
+</p>
+<p>Command-line macros and assertions are implemented by pushing a buffer
+containing the right hand side of an equivalent <code>#define</code> or
+<code>#assert</code> directive. Some built-in macros are handled similarly.
+Since these are all processed before the first line of the main input
+file, it will typically have an assigned line closer to twenty than to
+one.
+</p>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Guard-Macros.html#Guard-Macros" accesskey="n" rel="next">Guard Macros</a>, Previous: <a href="Token-Spacing.html#Token-Spacing" accesskey="p" rel="previous">Token Spacing</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/share/doc/cppinternals/Macro-Expansion.html b/share/doc/cppinternals/Macro-Expansion.html
new file mode 100644
index 0000000..3ae799b
--- /dev/null
+++ b/share/doc/cppinternals/Macro-Expansion.html
@@ -0,0 +1,241 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
+<head>
+<title>The GNU C Preprocessor Internals: Macro Expansion</title>
+
+<meta name="description" content="The GNU C Preprocessor Internals: Macro Expansion">
+<meta name="keywords" content="The GNU C Preprocessor Internals: Macro Expansion">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<link href="index.html#Top" rel="start" title="Top">
+<link href="Concept-Index.html#Concept-Index" rel="index" title="Concept Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="index.html#Top" rel="up" title="Top">
+<link href="Token-Spacing.html#Token-Spacing" rel="next" title="Token Spacing">
+<link href="Hash-Nodes.html#Hash-Nodes" rel="previous" title="Hash Nodes">
+<style type="text/css">
+<!--
+a.summary-letter {text-decoration: none}
+blockquote.smallquotation {font-size: smaller}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+div.indentedblock {margin-left: 3.2em}
+div.lisp {margin-left: 3.2em}
+div.smalldisplay {margin-left: 3.2em}
+div.smallexample {margin-left: 3.2em}
+div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
+div.smalllisp {margin-left: 3.2em}
+kbd {font-style:oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+pre.smalldisplay {font-family: inherit; font-size: smaller}
+pre.smallexample {font-size: smaller}
+pre.smallformat {font-family: inherit; font-size: smaller}
+pre.smalllisp {font-size: smaller}
+span.nocodebreak {white-space:nowrap}
+span.nolinebreak {white-space:nowrap}
+span.roman {font-family:serif; font-weight:normal}
+span.sansserif {font-family:sans-serif; font-weight:normal}
+ul.no-bullet {list-style: none}
+-->
+</style>
+
+
+</head>
+
+<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+<a name="Macro-Expansion"></a>
+<div class="header">
+<p>
+Next: <a href="Token-Spacing.html#Token-Spacing" accesskey="n" rel="next">Token Spacing</a>, Previous: <a href="Hash-Nodes.html#Hash-Nodes" accesskey="p" rel="previous">Hash Nodes</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<a name="Macro-Expansion-Algorithm"></a>
+<h2 class="unnumbered">Macro Expansion Algorithm</h2>
+<a name="index-macro-expansion"></a>
+
+<p>Macro expansion is a tricky operation, fraught with nasty corner cases
+and situations that render what you thought was a nifty way to
+optimize the preprocessor&rsquo;s expansion algorithm wrong in quite subtle
+ways.
+</p>
+<p>I strongly recommend you have a good grasp of how the C and C++
+standards require macros to be expanded before diving into this
+section, let alone the code!. If you don&rsquo;t have a clear mental
+picture of how things like nested macro expansion, stringizing and
+token pasting are supposed to work, damage to your sanity can quickly
+result.
+</p>
+<a name="Internal-representation-of-macros"></a>
+<h3 class="section">Internal representation of macros</h3>
+<a name="index-macro-representation-_0028internal_0029"></a>
+
+<p>The preprocessor stores macro expansions in tokenized form. This
+saves repeated lexing passes during expansion, at the cost of a small
+increase in memory consumption on average. The tokens are stored
+contiguously in memory, so a pointer to the first one and a token
+count is all you need to get the replacement list of a macro.
+</p>
+<p>If the macro is a function-like macro the preprocessor also stores its
+parameters, in the form of an ordered list of pointers to the hash
+table entry of each parameter&rsquo;s identifier. Further, in the macro&rsquo;s
+stored expansion each occurrence of a parameter is replaced with a
+special token of type <code>CPP_MACRO_ARG</code>. Each such token holds the
+index of the parameter it represents in the parameter list, which
+allows rapid replacement of parameters with their arguments during
+expansion. Despite this optimization it is still necessary to store
+the original parameters to the macro, both for dumping with e.g.,
+<samp>-dD</samp>, and to warn about non-trivial macro redefinitions when
+the parameter names have changed.
+</p>
+<a name="Macro-expansion-overview"></a>
+<h3 class="section">Macro expansion overview</h3>
+<p>The preprocessor maintains a <em>context stack</em>, implemented as a
+linked list of <code>cpp_context</code> structures, which together represent
+the macro expansion state at any one time. The <code>struct
+cpp_reader</code> member variable <code>context</code> points to the current top
+of this stack. The top normally holds the unexpanded replacement list
+of the innermost macro under expansion, except when cpplib is about to
+pre-expand an argument, in which case it holds that argument&rsquo;s
+unexpanded tokens.
+</p>
+<p>When there are no macros under expansion, cpplib is in <em>base
+context</em>. All contexts other than the base context contain a
+contiguous list of tokens delimited by a starting and ending token.
+When not in base context, cpplib obtains the next token from the list
+of the top context. If there are no tokens left in the list, it pops
+that context off the stack, and subsequent ones if necessary, until an
+unexhausted context is found or it returns to base context. In base
+context, cpplib reads tokens directly from the lexer.
+</p>
+<p>If it encounters an identifier that is both a macro and enabled for
+expansion, cpplib prepares to push a new context for that macro on the
+stack by calling the routine <code>enter_macro_context</code>. When this
+routine returns, the new context will contain the unexpanded tokens of
+the replacement list of that macro. In the case of function-like
+macros, <code>enter_macro_context</code> also replaces any parameters in the
+replacement list, stored as <code>CPP_MACRO_ARG</code> tokens, with the
+appropriate macro argument. If the standard requires that the
+parameter be replaced with its expanded argument, the argument will
+have been fully macro expanded first.
+</p>
+<p><code>enter_macro_context</code> also handles special macros like
+<code>__LINE__</code>. Although these macros expand to a single token which
+cannot contain any further macros, for reasons of token spacing
+(see <a href="Token-Spacing.html#Token-Spacing">Token Spacing</a>) and simplicity of implementation, cpplib
+handles these special macros by pushing a context containing just that
+one token.
+</p>
+<p>The final thing that <code>enter_macro_context</code> does before returning
+is to mark the macro disabled for expansion (except for special macros
+like <code>__TIME__</code>). The macro is re-enabled when its context is
+later popped from the context stack, as described above. This strict
+ordering ensures that a macro is disabled whilst its expansion is
+being scanned, but that it is <em>not</em> disabled whilst any arguments
+to it are being expanded.
+</p>
+<a name="Scanning-the-replacement-list-for-macros-to-expand"></a>
+<h3 class="section">Scanning the replacement list for macros to expand</h3>
+<p>The C standard states that, after any parameters have been replaced
+with their possibly-expanded arguments, the replacement list is
+scanned for nested macros. Further, any identifiers in the
+replacement list that are not expanded during this scan are never
+again eligible for expansion in the future, if the reason they were
+not expanded is that the macro in question was disabled.
+</p>
+<p>Clearly this latter condition can only apply to tokens resulting from
+argument pre-expansion. Other tokens never have an opportunity to be
+re-tested for expansion. It is possible for identifiers that are
+function-like macros to not expand initially but to expand during a
+later scan. This occurs when the identifier is the last token of an
+argument (and therefore originally followed by a comma or a closing
+parenthesis in its macro&rsquo;s argument list), and when it replaces its
+parameter in the macro&rsquo;s replacement list, the subsequent token
+happens to be an opening parenthesis (itself possibly the first token
+of an argument).
+</p>
+<p>It is important to note that when cpplib reads the last token of a
+given context, that context still remains on the stack. Only when
+looking for the <em>next</em> token do we pop it off the stack and drop
+to a lower context. This makes backing up by one token easy, but more
+importantly ensures that the macro corresponding to the current
+context is still disabled when we are considering the last token of
+its replacement list for expansion (or indeed expanding it). As an
+example, which illustrates many of the points above, consider
+</p>
+<div class="smallexample">
+<pre class="smallexample">#define foo(x) bar x
+foo(foo) (2)
+</pre></div>
+
+<p>which fully expands to &lsquo;<samp>bar foo (2)</samp>&rsquo;. During pre-expansion
+of the argument, &lsquo;<samp>foo</samp>&rsquo; does not expand even though the macro is
+enabled, since it has no following parenthesis [pre-expansion of an
+argument only uses tokens from that argument; it cannot take tokens
+from whatever follows the macro invocation]. This still leaves the
+argument token &lsquo;<samp>foo</samp>&rsquo; eligible for future expansion. Then, when
+re-scanning after argument replacement, the token &lsquo;<samp>foo</samp>&rsquo; is
+rejected for expansion, and marked ineligible for future expansion,
+since the macro is now disabled. It is disabled because the
+replacement list &lsquo;<samp>bar foo</samp>&rsquo; of the macro is still on the context
+stack.
+</p>
+<p>If instead the algorithm looked for an opening parenthesis first and
+then tested whether the macro were disabled it would be subtly wrong.
+In the example above, the replacement list of &lsquo;<samp>foo</samp>&rsquo; would be
+popped in the process of finding the parenthesis, re-enabling
+&lsquo;<samp>foo</samp>&rsquo; and expanding it a second time.
+</p>
+<a name="Looking-for-a-function_002dlike-macro_0027s-opening-parenthesis"></a>
+<h3 class="section">Looking for a function-like macro&rsquo;s opening parenthesis</h3>
+<p>Function-like macros only expand when immediately followed by a
+parenthesis. To do this cpplib needs to temporarily disable macros
+and read the next token. Unfortunately, because of spacing issues
+(see <a href="Token-Spacing.html#Token-Spacing">Token Spacing</a>), there can be fake padding tokens in-between,
+and if the next real token is not a parenthesis cpplib needs to be
+able to back up that one token as well as retain the information in
+any intervening padding tokens.
+</p>
+<p>Backing up more than one token when macros are involved is not
+permitted by cpplib, because in general it might involve issues like
+restoring popped contexts onto the context stack, which are too hard.
+Instead, searching for the parenthesis is handled by a special
+function, <code>funlike_invocation_p</code>, which remembers padding
+information as it reads tokens. If the next real token is not an
+opening parenthesis, it backs up that one token, and then pushes an
+extra context just containing the padding information if necessary.
+</p>
+<a name="Marking-tokens-ineligible-for-future-expansion"></a>
+<h3 class="section">Marking tokens ineligible for future expansion</h3>
+<p>As discussed above, cpplib needs a way of marking tokens as
+unexpandable. Since the tokens cpplib handles are read-only once they
+have been lexed, it instead makes a copy of the token and adds the
+flag <code>NO_EXPAND</code> to the copy.
+</p>
+<p>For efficiency and to simplify memory management by avoiding having to
+remember to free these tokens, they are allocated as temporary tokens
+from the lexer&rsquo;s current token run (see <a href="Lexer.html#Lexing-a-line">Lexing a line</a>) using the
+function <code>_cpp_temp_token</code>. The tokens are then re-used once the
+current line of tokens has been read in.
+</p>
+<p>This might sound unsafe. However, tokens runs are not re-used at the
+end of a line if it happens to be in the middle of a macro argument
+list, and cpplib only wants to back-up more than one lexer token in
+situations where no macro expansion is involved, so the optimization
+is safe.
+</p>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Token-Spacing.html#Token-Spacing" accesskey="n" rel="next">Token Spacing</a>, Previous: <a href="Hash-Nodes.html#Hash-Nodes" accesskey="p" rel="previous">Hash Nodes</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/share/doc/cppinternals/Token-Spacing.html b/share/doc/cppinternals/Token-Spacing.html
new file mode 100644
index 0000000..8da69cb
--- /dev/null
+++ b/share/doc/cppinternals/Token-Spacing.html
@@ -0,0 +1,203 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
+<head>
+<title>The GNU C Preprocessor Internals: Token Spacing</title>
+
+<meta name="description" content="The GNU C Preprocessor Internals: Token Spacing">
+<meta name="keywords" content="The GNU C Preprocessor Internals: Token Spacing">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<link href="index.html#Top" rel="start" title="Top">
+<link href="Concept-Index.html#Concept-Index" rel="index" title="Concept Index">
+<link href="index.html#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="index.html#Top" rel="up" title="Top">
+<link href="Line-Numbering.html#Line-Numbering" rel="next" title="Line Numbering">
+<link href="Macro-Expansion.html#Macro-Expansion" rel="previous" title="Macro Expansion">
+<style type="text/css">
+<!--
+a.summary-letter {text-decoration: none}
+blockquote.smallquotation {font-size: smaller}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+div.indentedblock {margin-left: 3.2em}
+div.lisp {margin-left: 3.2em}
+div.smalldisplay {margin-left: 3.2em}
+div.smallexample {margin-left: 3.2em}
+div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
+div.smalllisp {margin-left: 3.2em}
+kbd {font-style:oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+pre.smalldisplay {font-family: inherit; font-size: smaller}
+pre.smallexample {font-size: smaller}
+pre.smallformat {font-family: inherit; font-size: smaller}
+pre.smalllisp {font-size: smaller}
+span.nocodebreak {white-space:nowrap}
+span.nolinebreak {white-space:nowrap}
+span.roman {font-family:serif; font-weight:normal}
+span.sansserif {font-family:sans-serif; font-weight:normal}
+ul.no-bullet {list-style: none}
+-->
+</style>
+
+
+</head>
+
+<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+<a name="Token-Spacing"></a>
+<div class="header">
+<p>
+Next: <a href="Line-Numbering.html#Line-Numbering" accesskey="n" rel="next">Line Numbering</a>, Previous: <a href="Macro-Expansion.html#Macro-Expansion" accesskey="p" rel="previous">Macro Expansion</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<a name="Token-Spacing-1"></a>
+<h2 class="unnumbered">Token Spacing</h2>
+<a name="index-paste-avoidance"></a>
+<a name="index-spacing"></a>
+<a name="index-token-spacing"></a>
+
+<p>First, consider an issue that only concerns the stand-alone
+preprocessor: there needs to be a guarantee that re-reading its preprocessed
+output results in an identical token stream. Without taking special
+measures, this might not be the case because of macro substitution.
+For example:
+</p>
+<div class="smallexample">
+<pre class="smallexample">#define PLUS +
+#define EMPTY
+#define f(x) =x=
++PLUS -EMPTY- PLUS+ f(=)
+ &rarr; + + - - + + = = =
+<em>not</em>
+ &rarr; ++ -- ++ ===
+</pre></div>
+
+<p>One solution would be to simply insert a space between all adjacent
+tokens. However, we would like to keep space insertion to a minimum,
+both for aesthetic reasons and because it causes problems for people who
+still try to abuse the preprocessor for things like Fortran source and
+Makefiles.
+</p>
+<p>For now, just notice that when tokens are added (or removed, as shown by
+the <code>EMPTY</code> example) from the original lexed token stream, we need
+to check for accidental token pasting. We call this <em>paste
+avoidance</em>. Token addition and removal can only occur because of macro
+expansion, but accidental pasting can occur in many places: both before
+and after each macro replacement, each argument replacement, and
+additionally each token created by the &lsquo;<samp>#</samp>&rsquo; and &lsquo;<samp>##</samp>&rsquo; operators.
+</p>
+<p>Look at how the preprocessor gets whitespace output correct
+normally. The <code>cpp_token</code> structure contains a flags byte, and one
+of those flags is <code>PREV_WHITE</code>. This is flagged by the lexer, and
+indicates that the token was preceded by whitespace of some form other
+than a new line. The stand-alone preprocessor can use this flag to
+decide whether to insert a space between tokens in the output.
+</p>
+<p>Now consider the result of the following macro expansion:
+</p>
+<div class="smallexample">
+<pre class="smallexample">#define add(x, y, z) x + y +z;
+sum = add (1,2, 3);
+ &rarr; sum = 1 + 2 +3;
+</pre></div>
+
+<p>The interesting thing here is that the tokens &lsquo;<samp>1</samp>&rsquo; and &lsquo;<samp>2</samp>&rsquo; are
+output with a preceding space, and &lsquo;<samp>3</samp>&rsquo; is output without a
+preceding space, but when lexed none of these tokens had that property.
+Careful consideration reveals that &lsquo;<samp>1</samp>&rsquo; gets its preceding
+whitespace from the space preceding &lsquo;<samp>add</samp>&rsquo; in the macro invocation,
+<em>not</em> replacement list. &lsquo;<samp>2</samp>&rsquo; gets its whitespace from the
+space preceding the parameter &lsquo;<samp>y</samp>&rsquo; in the macro replacement list,
+and &lsquo;<samp>3</samp>&rsquo; has no preceding space because parameter &lsquo;<samp>z</samp>&rsquo; has none
+in the replacement list.
+</p>
+<p>Once lexed, tokens are effectively fixed and cannot be altered, since
+pointers to them might be held in many places, in particular by
+in-progress macro expansions. So instead of modifying the two tokens
+above, the preprocessor inserts a special token, which I call a
+<em>padding token</em>, into the token stream to indicate that spacing of
+the subsequent token is special. The preprocessor inserts padding
+tokens in front of every macro expansion and expanded macro argument.
+These point to a <em>source token</em> from which the subsequent real token
+should inherit its spacing. In the above example, the source tokens are
+&lsquo;<samp>add</samp>&rsquo; in the macro invocation, and &lsquo;<samp>y</samp>&rsquo; and &lsquo;<samp>z</samp>&rsquo; in the
+macro replacement list, respectively.
+</p>
+<p>It is quite easy to get multiple padding tokens in a row, for example if
+a macro&rsquo;s first replacement token expands straight into another macro.
+</p>
+<div class="smallexample">
+<pre class="smallexample">#define foo bar
+#define bar baz
+[foo]
+ &rarr; [baz]
+</pre></div>
+
+<p>Here, two padding tokens are generated with sources the &lsquo;<samp>foo</samp>&rsquo; token
+between the brackets, and the &lsquo;<samp>bar</samp>&rsquo; token from foo&rsquo;s replacement
+list, respectively. Clearly the first padding token is the one to
+use, so the output code should contain a rule that the first
+padding token in a sequence is the one that matters.
+</p>
+<p>But what if a macro expansion is left? Adjusting the above
+example slightly:
+</p>
+<div class="smallexample">
+<pre class="smallexample">#define foo bar
+#define bar EMPTY baz
+#define EMPTY
+[foo] EMPTY;
+ &rarr; [ baz] ;
+</pre></div>
+
+<p>As shown, now there should be a space before &lsquo;<samp>baz</samp>&rsquo; and the
+semicolon in the output.
+</p>
+<p>The rules we decided above fail for &lsquo;<samp>baz</samp>&rsquo;: we generate three
+padding tokens, one per macro invocation, before the token &lsquo;<samp>baz</samp>&rsquo;.
+We would then have it take its spacing from the first of these, which
+carries source token &lsquo;<samp>foo</samp>&rsquo; with no leading space.
+</p>
+<p>It is vital that cpplib get spacing correct in these examples since any
+of these macro expansions could be stringized, where spacing matters.
+</p>
+<p>So, this demonstrates that not just entering macro and argument
+expansions, but leaving them requires special handling too. I made
+cpplib insert a padding token with a <code>NULL</code> source token when
+leaving macro expansions, as well as after each replaced argument in a
+macro&rsquo;s replacement list. It also inserts appropriate padding tokens on
+either side of tokens created by the &lsquo;<samp>#</samp>&rsquo; and &lsquo;<samp>##</samp>&rsquo; operators.
+I expanded the rule so that, if we see a padding token with a
+<code>NULL</code> source token, <em>and</em> that source token has no leading
+space, then we behave as if we have seen no padding tokens at all. A
+quick check shows this rule will then get the above example correct as
+well.
+</p>
+<p>Now a relationship with paste avoidance is apparent: we have to be
+careful about paste avoidance in exactly the same locations we have
+padding tokens in order to get white space correct. This makes
+implementation of paste avoidance easy: wherever the stand-alone
+preprocessor is fixing up spacing because of padding tokens, and it
+turns out that no space is needed, it has to take the extra step to
+check that a space is not needed after all to avoid an accidental paste.
+The function <code>cpp_avoid_paste</code> advises whether a space is required
+between two consecutive tokens. To avoid excessive spacing, it tries
+hard to only require a space if one is likely to be necessary, but for
+reasons of efficiency it is slightly conservative and might recommend a
+space where one is not strictly needed.
+</p>
+<hr>
+<div class="header">
+<p>
+Next: <a href="Line-Numbering.html#Line-Numbering" accesskey="n" rel="next">Line Numbering</a>, Previous: <a href="Macro-Expansion.html#Macro-Expansion" accesskey="p" rel="previous">Macro Expansion</a>, Up: <a href="index.html#Top" accesskey="u" rel="up">Top</a> &nbsp; [<a href="index.html#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>
diff --git a/share/doc/cppinternals/index.html b/share/doc/cppinternals/index.html
new file mode 100644
index 0000000..db1fad9
--- /dev/null
+++ b/share/doc/cppinternals/index.html
@@ -0,0 +1,160 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
+<head>
+<title>The GNU C Preprocessor Internals: Top</title>
+
+<meta name="description" content="The GNU C Preprocessor Internals: Top">
+<meta name="keywords" content="The GNU C Preprocessor Internals: Top">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="makeinfo">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<link href="#Top" rel="start" title="Top">
+<link href="Concept-Index.html#Concept-Index" rel="index" title="Concept Index">
+<link href="#SEC_Contents" rel="contents" title="Table of Contents">
+<link href="../dir/index.html" rel="up" title="(dir)">
+<link href="Conventions.html#Conventions" rel="next" title="Conventions">
+<style type="text/css">
+<!--
+a.summary-letter {text-decoration: none}
+blockquote.smallquotation {font-size: smaller}
+div.display {margin-left: 3.2em}
+div.example {margin-left: 3.2em}
+div.indentedblock {margin-left: 3.2em}
+div.lisp {margin-left: 3.2em}
+div.smalldisplay {margin-left: 3.2em}
+div.smallexample {margin-left: 3.2em}
+div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
+div.smalllisp {margin-left: 3.2em}
+kbd {font-style:oblique}
+pre.display {font-family: inherit}
+pre.format {font-family: inherit}
+pre.menu-comment {font-family: serif}
+pre.menu-preformatted {font-family: serif}
+pre.smalldisplay {font-family: inherit; font-size: smaller}
+pre.smallexample {font-size: smaller}
+pre.smallformat {font-family: inherit; font-size: smaller}
+pre.smalllisp {font-size: smaller}
+span.nocodebreak {white-space:nowrap}
+span.nolinebreak {white-space:nowrap}
+span.roman {font-family:serif; font-weight:normal}
+span.sansserif {font-family:sans-serif; font-weight:normal}
+ul.no-bullet {list-style: none}
+-->
+</style>
+
+
+</head>
+
+<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
+<h1 class="settitle" align="center">The GNU C Preprocessor Internals</h1>
+
+
+
+
+
+
+
+
+
+
+
+
+
+<a name="SEC_Contents"></a>
+<h2 class="contents-heading">Table of Contents</h2>
+
+<div class="contents">
+
+<ul class="no-bullet">
+ <li><a name="toc-Cpplib_002d_002d_002dthe-GNU-C-Preprocessor" href="#Cpplib_002d_002d_002dthe-GNU-C-Preprocessor">1 Cpplib&mdash;the GNU C Preprocessor</a></li>
+ <li><a name="toc-Conventions-1" href="Conventions.html#Conventions">Conventions</a></li>
+ <li><a name="toc-The-Lexer" href="Lexer.html#Lexer">The Lexer</a>
+ <ul class="no-bullet">
+ <li><a name="toc-Overview" href="Lexer.html#Overview">Overview</a></li>
+ <li><a name="toc-Lexing-a-token" href="Lexer.html#Lexing-a-token">Lexing a token</a></li>
+ <li><a name="toc-Lexing-a-line-1" href="Lexer.html#Lexing-a-line-1">Lexing a line</a></li>
+ </ul></li>
+ <li><a name="toc-Hash-Nodes-1" href="Hash-Nodes.html#Hash-Nodes">Hash Nodes</a></li>
+ <li><a name="toc-Macro-Expansion-Algorithm" href="Macro-Expansion.html#Macro-Expansion">Macro Expansion Algorithm</a>
+ <ul class="no-bullet">
+ <li><a name="toc-Internal-representation-of-macros" href="Macro-Expansion.html#Internal-representation-of-macros">Internal representation of macros</a></li>
+ <li><a name="toc-Macro-expansion-overview" href="Macro-Expansion.html#Macro-expansion-overview">Macro expansion overview</a></li>
+ <li><a name="toc-Scanning-the-replacement-list-for-macros-to-expand" href="Macro-Expansion.html#Scanning-the-replacement-list-for-macros-to-expand">Scanning the replacement list for macros to expand</a></li>
+ <li><a name="toc-Looking-for-a-function_002dlike-macro_0027s-opening-parenthesis" href="Macro-Expansion.html#Looking-for-a-function_002dlike-macro_0027s-opening-parenthesis">Looking for a function-like macro&rsquo;s opening parenthesis</a></li>
+ <li><a name="toc-Marking-tokens-ineligible-for-future-expansion" href="Macro-Expansion.html#Marking-tokens-ineligible-for-future-expansion">Marking tokens ineligible for future expansion</a></li>
+ </ul></li>
+ <li><a name="toc-Token-Spacing-1" href="Token-Spacing.html#Token-Spacing">Token Spacing</a></li>
+ <li><a name="toc-Line-numbering" href="Line-Numbering.html#Line-Numbering">Line numbering</a>
+ <ul class="no-bullet">
+ <li><a name="toc-Just-which-line-number-anyway_003f" href="Line-Numbering.html#Just-which-line-number-anyway_003f">Just which line number anyway?</a></li>
+ <li><a name="toc-Representation-of-line-numbers" href="Line-Numbering.html#Representation-of-line-numbers">Representation of line numbers</a></li>
+ </ul></li>
+ <li><a name="toc-The-Multiple_002dInclude-Optimization" href="Guard-Macros.html#Guard-Macros">The Multiple-Include Optimization</a></li>
+ <li><a name="toc-File-Handling" href="Files.html#Files">File Handling</a></li>
+ <li><a name="toc-Concept-Index-1" href="Concept-Index.html#Concept-Index">Concept Index</a></li>
+</ul>
+</div>
+
+
+<a name="Top"></a>
+<div class="header">
+<p>
+Next: <a href="Conventions.html#Conventions" accesskey="n" rel="next">Conventions</a>, Up: <a href="../dir/index.html" accesskey="u" rel="up">(dir)</a> &nbsp; [<a href="#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+<hr>
+<a name="SEC_Top"></a>
+<a name="Cpplib_002d_002d_002dthe-GNU-C-Preprocessor"></a>
+<h2 class="chapter">1 Cpplib&mdash;the GNU C Preprocessor</h2>
+
+<p>The GNU C preprocessor is
+implemented as a library, <em>cpplib</em>, so it can be easily shared between
+a stand-alone preprocessor, and a preprocessor integrated with the C,
+C++ and Objective-C front ends. It is also available for use by other
+programs, though this is not recommended as its exposed interface has
+not yet reached a point of reasonable stability.
+</p>
+<p>The library has been written to be re-entrant, so that it can be used
+to preprocess many files simultaneously if necessary. It has also been
+written with the preprocessing token as the fundamental unit; the
+preprocessor in previous versions of GCC would operate on text strings
+as the fundamental unit.
+</p>
+<p>This brief manual documents the internals of cpplib, and explains some
+of the tricky issues. It is intended that, along with the comments in
+the source code, a reasonably competent C programmer should be able to
+figure out what the code is doing, and why things have been implemented
+the way they have.
+</p>
+<table class="menu" border="0" cellspacing="0">
+<tr><td align="left" valign="top">&bull; <a href="Conventions.html#Conventions" accesskey="1">Conventions</a>:</td><td>&nbsp;&nbsp;</td><td align="left" valign="top">Conventions used in the code.
+</td></tr>
+<tr><td align="left" valign="top">&bull; <a href="Lexer.html#Lexer" accesskey="2">Lexer</a>:</td><td>&nbsp;&nbsp;</td><td align="left" valign="top">The combined C, C++ and Objective-C Lexer.
+</td></tr>
+<tr><td align="left" valign="top">&bull; <a href="Hash-Nodes.html#Hash-Nodes" accesskey="3">Hash Nodes</a>:</td><td>&nbsp;&nbsp;</td><td align="left" valign="top">All identifiers are entered into a hash table.
+</td></tr>
+<tr><td align="left" valign="top">&bull; <a href="Macro-Expansion.html#Macro-Expansion" accesskey="4">Macro Expansion</a>:</td><td>&nbsp;&nbsp;</td><td align="left" valign="top">Macro expansion algorithm.
+</td></tr>
+<tr><td align="left" valign="top">&bull; <a href="Token-Spacing.html#Token-Spacing" accesskey="5">Token Spacing</a>:</td><td>&nbsp;&nbsp;</td><td align="left" valign="top">Spacing and paste avoidance issues.
+</td></tr>
+<tr><td align="left" valign="top">&bull; <a href="Line-Numbering.html#Line-Numbering" accesskey="6">Line Numbering</a>:</td><td>&nbsp;&nbsp;</td><td align="left" valign="top">Tracking location within files.
+</td></tr>
+<tr><td align="left" valign="top">&bull; <a href="Guard-Macros.html#Guard-Macros" accesskey="7">Guard Macros</a>:</td><td>&nbsp;&nbsp;</td><td align="left" valign="top">Optimizing header files with guard macros.
+</td></tr>
+<tr><td align="left" valign="top">&bull; <a href="Files.html#Files" accesskey="8">Files</a>:</td><td>&nbsp;&nbsp;</td><td align="left" valign="top">File handling.
+</td></tr>
+<tr><td align="left" valign="top">&bull; <a href="Concept-Index.html#Concept-Index" accesskey="9">Concept Index</a>:</td><td>&nbsp;&nbsp;</td><td align="left" valign="top">Index.
+</td></tr>
+</table>
+
+<hr>
+<div class="header">
+<p>
+Next: <a href="Conventions.html#Conventions" accesskey="n" rel="next">Conventions</a>, Up: <a href="../dir/index.html" accesskey="u" rel="up">(dir)</a> &nbsp; [<a href="#SEC_Contents" title="Table of contents" rel="contents">Contents</a>][<a href="Concept-Index.html#Concept-Index" title="Index" rel="index">Index</a>]</p>
+</div>
+
+
+
+</body>
+</html>