Home | History | Annotate | Download | only in doc
      1 <!DOCTYPE refentry [
      2   <!-- Fill in your name for FIRSTNAME and SURNAME. -->
      3   <!ENTITY dhfirstname "<firstname>Scott</firstname>">
      4   <!ENTITY dhsurname   "<surname>Bronson</surname>">
      5   <!-- Please adjust the date whenever revising the manpage. -->
      6   <!ENTITY dhdate      "<date>March 11, 2016</date>">
      7   <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
      8        allowed: see man(7), man(1). -->
      9   <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
     10   <!ENTITY dhemail     "<email>bronson (a] rinspin.com</email>">
     11   <!ENTITY dhusername  "Scott Bronson">
     12   <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
     13   <!ENTITY dhpackage   "xmlwf">
     14 
     15   <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
     16   <!ENTITY gnu         "<acronym>GNU</acronym>">
     17 ]>
     18 
     19 <refentry>
     20   <refentryinfo>
     21     <address>
     22       &dhemail;
     23     </address>
     24     <author>
     25       &dhfirstname;
     26       &dhsurname;
     27     </author>
     28     <copyright>
     29       <year>2001</year>
     30       <holder>&dhusername;</holder>
     31     </copyright>
     32     &dhdate;
     33   </refentryinfo>
     34   <refmeta>
     35     &dhucpackage;
     36 
     37     &dhsection;
     38   </refmeta>
     39   <refnamediv>
     40     <refname>&dhpackage;</refname>
     41 
     42     <refpurpose>Determines if an XML document is well-formed</refpurpose>
     43   </refnamediv>
     44   <refsynopsisdiv>
     45     <cmdsynopsis>
     46       <command>&dhpackage;</command>
     47 	  <arg><option>-s</option></arg>
     48 	  <arg><option>-n</option></arg>
     49 	  <arg><option>-p</option></arg>
     50 	  <arg><option>-x</option></arg>
     51 
     52 	  <arg><option>-e <replaceable>encoding</replaceable></option></arg>
     53 	  <arg><option>-w</option></arg>
     54 
     55 	  <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
     56 	  <arg><option>-c</option></arg>
     57 	  <arg><option>-m</option></arg>
     58 
     59 	  <arg><option>-r</option></arg>
     60 	  <arg><option>-t</option></arg>
     61 
     62 	  <arg><option>-v</option></arg>
     63 
     64 	  <arg>file ...</arg>
     65     </cmdsynopsis>
     66   </refsynopsisdiv>
     67  
     68   <refsect1>
     69     <title>DESCRIPTION</title>
     70 
     71     <para>
     72 	<command>&dhpackage;</command> uses the Expat library to
     73 	determine if an XML document is well-formed.  It is
     74 	non-validating.
     75 	</para>
     76 
     77 	<para>
     78 	If you do not specify any files on the command-line, and you
     79 	have a recent version of <command>&dhpackage;</command>, the
     80 	input file will be read from standard input.
     81 	</para>
     82 
     83   </refsect1>
     84 
     85   <refsect1>
     86     <title>WELL-FORMED DOCUMENTS</title>
     87 
     88 	<para>
     89 	  A well-formed document must adhere to the
     90 	  following rules:
     91 	</para>
     92 
     93 	<itemizedlist>
     94       <listitem><para>
     95 	    The file begins with an XML declaration.  For instance,
     96 		<literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
     97 		<emphasis>NOTE:</emphasis>
     98 		<command>&dhpackage;</command> does not currently
     99 		check for a valid XML declaration.
    100       </para></listitem>
    101       <listitem><para>
    102 		Every start tag is either empty (&lt;tag/&gt;)
    103 		or has a corresponding end tag.
    104       </para></listitem>
    105       <listitem><para>
    106 	    There is exactly one root element.  This element must contain
    107 		all other elements in the document.  Only comments, white
    108 		space, and processing instructions may come after the close
    109 		of the root element.
    110       </para></listitem>
    111       <listitem><para>
    112 		All elements nest properly.
    113       </para></listitem>
    114       <listitem><para>
    115 		All attribute values are enclosed in quotes (either single
    116 		or double).
    117       </para></listitem>
    118     </itemizedlist>
    119 
    120 	<para>
    121 	  If the document has a DTD, and it strictly complies with that
    122 	  DTD, then the document is also considered <emphasis>valid</emphasis>.
    123 	  <command>&dhpackage;</command> is a non-validating parser --
    124 	  it does not check the DTD.  However, it does support
    125 	  external entities (see the <option>-x</option> option).
    126 	</para>
    127   </refsect1>
    128 
    129   <refsect1>
    130     <title>OPTIONS</title>
    131 
    132 <para>
    133 When an option includes an argument, you may specify the argument either
    134 separately ("<option>-d</option> output") or concatenated with the
    135 option ("<option>-d</option>output").  <command>&dhpackage;</command>
    136 supports both.
    137 </para>
    138 
    139     <variablelist>
    140 
    141       <varlistentry>
    142         <term><option>-c</option></term>
    143         <listitem>
    144 		<para>
    145   If the input file is well-formed and <command>&dhpackage;</command>
    146   doesn't encounter any errors, the input file is simply copied to
    147   the output directory unchanged.
    148   This implies no namespaces (turns off <option>-n</option>) and
    149   requires <option>-d</option> to specify an output file.
    150   		</para>
    151         </listitem>
    152       </varlistentry>
    153 
    154       <varlistentry>
    155         <term><option>-d output-dir</option></term>
    156         <listitem>
    157 		<para>
    158   Specifies a directory to contain transformed
    159   representations of the input files.
    160   By default, <option>-d</option> outputs a canonical representation
    161   (described below).
    162   You can select different output formats using <option>-c</option>
    163   and <option>-m</option>.
    164 	  </para>
    165 	  <para>
    166   The output filenames will
    167   be exactly the same as the input filenames or "STDIN" if the input is
    168   coming from standard input.  Therefore, you must be careful that the
    169   output file does not go into the same directory as the input
    170   file.  Otherwise, <command>&dhpackage;</command> will delete the
    171   input file before it generates the output file (just like running
    172   <literal>cat &lt; file &gt; file</literal> in most shells).
    173 	  </para>
    174 	  <para> 
    175   Two structurally equivalent XML documents have a byte-for-byte
    176   identical canonical XML representation.
    177   Note that ignorable white space is considered significant and
    178   is treated equivalently to data.
    179   More on canonical XML can be found at
    180   http://www.jclark.com/xml/canonxml.html .
    181 	  </para>
    182         </listitem>
    183       </varlistentry>
    184 
    185       <varlistentry>
    186         <term><option>-e encoding</option></term>
    187         <listitem>
    188 		<para>
    189    Specifies the character encoding for the document, overriding
    190    any document encoding declaration.  <command>&dhpackage;</command>
    191    supports four built-in encodings:
    192    	<literal>US-ASCII</literal>,
    193 	<literal>UTF-8</literal>,
    194 	<literal>UTF-16</literal>, and
    195 	<literal>ISO-8859-1</literal>.
    196    Also see the <option>-w</option> option.
    197 	   </para>
    198         </listitem>
    199       </varlistentry>
    200 
    201       <varlistentry>
    202         <term><option>-m</option></term>
    203         <listitem>
    204 		<para>
    205   Outputs some strange sort of XML file that completely
    206   describes the input file, including character positions.
    207   Requires <option>-d</option> to specify an output file.
    208 	   </para>
    209         </listitem>
    210       </varlistentry>
    211 
    212       <varlistentry>
    213         <term><option>-n</option></term>
    214         <listitem>
    215 		<para>
    216   Turns on namespace processing.  (describe namespaces)
    217   <option>-c</option> disables namespaces.
    218 	   </para>
    219         </listitem>
    220       </varlistentry>
    221 
    222       <varlistentry>
    223         <term><option>-p</option></term>
    224         <listitem>
    225 		<para>
    226     Tells xmlwf to process external DTDs and parameter
    227     entities.
    228 	 </para>
    229 	 <para>
    230    Normally <command>&dhpackage;</command> never parses parameter
    231    entities.  <option>-p</option> tells it to always parse them.
    232    <option>-p</option> implies <option>-x</option>.
    233 	   </para>
    234         </listitem>
    235       </varlistentry>
    236 
    237       <varlistentry>
    238         <term><option>-r</option></term>
    239         <listitem>
    240 		<para>
    241    Normally <command>&dhpackage;</command> memory-maps the XML file
    242    before parsing; this can result in faster parsing on many
    243    platforms.
    244    <option>-r</option> turns off memory-mapping and uses normal file
    245    IO calls instead.
    246    Of course, memory-mapping is automatically turned off
    247    when reading from standard input.
    248 	   </para>
    249 		<para>
    250    Use of memory-mapping can cause some platforms to report
    251    substantially higher memory usage for
    252    <command>&dhpackage;</command>, but this appears to be a matter of
    253    the operating system reporting memory in a strange way; there is
    254    not a leak in <command>&dhpackage;</command>.
    255            </para>
    256         </listitem>
    257       </varlistentry>
    258 
    259       <varlistentry>
    260         <term><option>-s</option></term>
    261         <listitem>
    262 		<para>
    263   Prints an error if the document is not standalone. 
    264   A document is standalone if it has no external subset and no
    265   references to parameter entities.
    266 	   </para>
    267         </listitem>
    268       </varlistentry>
    269 
    270       <varlistentry>
    271         <term><option>-t</option></term>
    272         <listitem>
    273 		<para>
    274   Turns on timings.  This tells Expat to parse the entire file,
    275   but not perform any processing.
    276   This gives a fairly accurate idea of the raw speed of Expat itself
    277   without client overhead.
    278   <option>-t</option> turns off most of the output options
    279   (<option>-d</option>, <option>-m</option>, <option>-c</option>, ...).
    280 	   </para>
    281         </listitem>
    282       </varlistentry>
    283 
    284       <varlistentry>
    285         <term><option>-v</option></term>
    286         <listitem>
    287 		<para>
    288   Prints the version of the Expat library being used, including some
    289   information on the compile-time configuration of the library, and
    290   then exits.
    291 	   </para>
    292         </listitem>
    293       </varlistentry>
    294 
    295       <varlistentry>
    296         <term><option>-w</option></term>
    297         <listitem>
    298 		<para>
    299   Enables support for Windows code pages.
    300   Normally, <command>&dhpackage;</command> will throw an error if it
    301   runs across an encoding that it is not equipped to handle itself.  With
    302   <option>-w</option>, &dhpackage; will try to use a Windows code
    303   page.  See also <option>-e</option>.
    304 	   </para>
    305         </listitem>
    306       </varlistentry>
    307 
    308       <varlistentry>
    309         <term><option>-x</option></term>
    310         <listitem>
    311 		<para>
    312   Turns on parsing external entities.
    313   </para>
    314 <para>
    315   Non-validating parsers are not required to resolve external
    316   entities, or even expand entities at all.
    317   Expat always expands internal entities (?),
    318   but external entity parsing must be enabled explicitly.
    319   </para>
    320   <para>
    321   External entities are simply entities that obtain their
    322   data from outside the XML file currently being parsed.
    323   </para>
    324   <para>
    325   This is an example of an internal entity:
    326 <literallayout>
    327 &lt;!ENTITY vers '1.0.2'&gt;
    328 </literallayout>
    329   </para>
    330   <para>
    331   And here are some examples of external entities:
    332 
    333 <literallayout>
    334 &lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
    335 &lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
    336 </literallayout>
    337 
    338 	   </para>
    339         </listitem>
    340       </varlistentry>
    341 
    342       <varlistentry>
    343         <term><option>--</option></term>
    344         <listitem>
    345 		<para>
    346     (Two hyphens.)
    347     Terminates the list of options.  This is only needed if a filename
    348     starts with a hyphen.  For example:
    349 	   </para>
    350 <literallayout>
    351 &dhpackage; -- -myfile.xml
    352 </literallayout>
    353 		<para>
    354     will run <command>&dhpackage;</command> on the file
    355     <filename>-myfile.xml</filename>.
    356 	   </para>
    357         </listitem>
    358       </varlistentry>
    359     </variablelist>
    360 
    361 	<para>
    362     Older versions of <command>&dhpackage;</command> do not support
    363     reading from standard input.
    364 	</para>
    365   </refsect1>
    366 
    367   <refsect1>
    368   <title>OUTPUT</title>
    369     <para>
    370 	If an input file is not well-formed,
    371 	<command>&dhpackage;</command> prints a single line describing
    372 	the problem to standard output.  If a file is well formed,
    373 	<command>&dhpackage;</command> outputs nothing.
    374 	Note that the result code is <emphasis>not</emphasis> set.
    375 	</para>
    376   </refsect1>
    377   
    378   <refsect1>
    379     <title>BUGS</title>
    380 	<para>
    381 	<command>&dhpackage;</command> returns a 0 - noerr result,
    382 	even if the file is not well-formed.  There is no good way for
    383 	a program to use <command>&dhpackage;</command> to quickly
    384 	check a file -- it must parse <command>&dhpackage;</command>'s
    385 	standard output.
    386 	</para>
    387 	<para>
    388 	The errors should go to standard error, not standard output.
    389 	</para>
    390 	<para>
    391 	There should be a way to get <option>-d</option> to send its
    392 	output to standard output rather than forcing the user to send
    393 	it to a file.
    394 	</para>
    395 	<para>
    396 	I have no idea why anyone would want to use the
    397 	<option>-d</option>, <option>-c</option>, and
    398 	<option>-m</option> options.  If someone could explain it to
    399 	me, I'd like to add this information to this manpage.
    400 	</para>
    401   </refsect1>
    402 
    403   <refsect1>
    404     <title>ALTERNATIVES</title>
    405 	<para>
    406 	  Here are some XML validators on the web:
    407 
    408 <literallayout>
    409 http://www.hcrc.ed.ac.uk/~richard/xml-check.html
    410 http://www.stg.brown.edu/service/xmlvalid/
    411 http://www.scripting.com/frontier5/xml/code/xmlValidator.html
    412 http://www.xml.com/pub/a/tools/ruwf/check.html
    413 </literallayout>
    414 
    415 		 </para>
    416   </refsect1>
    417 
    418   <refsect1>
    419     <title>SEE ALSO</title>
    420 	<para>
    421 
    422 <literallayout>
    423 The Expat home page:        http://www.libexpat.org/
    424 The W3 XML specification:   http://www.w3.org/TR/REC-xml
    425 </literallayout>
    426 
    427 	</para>
    428   </refsect1>
    429 
    430   <refsect1>
    431     <title>AUTHOR</title>
    432     <para>
    433 	  This manual page was written by &dhusername; &dhemail; for
    434       the &debian; system (but may be used by others).  Permission is
    435       granted to copy, distribute and/or modify this document under
    436       the terms of the <acronym>GNU</acronym> Free Documentation
    437       License, Version 1.1.
    438 	</para>
    439   </refsect1>
    440 </refentry>
    441