Home | History | Annotate | Download | only in doc
      1 <!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
      2 
      3 <!-- Process this file with docbook-to-man to generate an nroff manual
      4      page: `docbook-to-man manpage.sgml > manpage.1'.  You may view
      5      the manual page with: `docbook-to-man manpage.sgml | nroff -man |
      6      less'.  A typical entry in a Makefile or Makefile.am is:
      7 
      8 manpage.1: manpage.sgml
      9 	docbook-to-man $< > $@
     10   -->
     11 
     12   <!-- Fill in your name for FIRSTNAME and SURNAME. -->
     13   <!ENTITY dhfirstname "<firstname>Scott</firstname>">
     14   <!ENTITY dhsurname   "<surname>Bronson</surname>">
     15   <!-- Please adjust the date whenever revising the manpage. -->
     16   <!ENTITY dhdate      "<date>December  5, 2001</date>">
     17   <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
     18        allowed: see man(7), man(1). -->
     19   <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
     20   <!ENTITY dhemail     "<email>bronson (a] rinspin.com</email>">
     21   <!ENTITY dhusername  "Scott Bronson">
     22   <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
     23   <!ENTITY dhpackage   "xmlwf">
     24 
     25   <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
     26   <!ENTITY gnu         "<acronym>GNU</acronym>">
     27 ]>
     28 
     29 <refentry>
     30   <refentryinfo>
     31     <address>
     32       &dhemail;
     33     </address>
     34     <author>
     35       &dhfirstname;
     36       &dhsurname;
     37     </author>
     38     <copyright>
     39       <year>2001</year>
     40       <holder>&dhusername;</holder>
     41     </copyright>
     42     &dhdate;
     43   </refentryinfo>
     44   <refmeta>
     45     &dhucpackage;
     46 
     47     &dhsection;
     48   </refmeta>
     49   <refnamediv>
     50     <refname>&dhpackage;</refname>
     51 
     52     <refpurpose>Determines if an XML document is well-formed</refpurpose>
     53   </refnamediv>
     54   <refsynopsisdiv>
     55     <cmdsynopsis>
     56       <command>&dhpackage;</command>
     57 	  <arg><option>-s</option></arg>
     58 	  <arg><option>-n</option></arg>
     59 	  <arg><option>-p</option></arg>
     60 	  <arg><option>-x</option></arg>
     61 
     62 	  <arg><option>-e <replaceable>encoding</replaceable></option></arg>
     63 	  <arg><option>-w</option></arg>
     64 
     65 	  <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
     66 	  <arg><option>-c</option></arg>
     67 	  <arg><option>-m</option></arg>
     68 
     69 	  <arg><option>-r</option></arg>
     70 	  <arg><option>-t</option></arg>
     71 
     72 	  <arg><option>-v</option></arg>
     73 
     74 	  <arg>file ...</arg>
     75     </cmdsynopsis>
     76   </refsynopsisdiv>
     77  
     78   <refsect1>
     79     <title>DESCRIPTION</title>
     80 
     81     <para>
     82 	<command>&dhpackage;</command> uses the Expat library to
     83 	determine if an XML document is well-formed.  It is
     84 	non-validating.
     85 	</para>
     86 
     87 	<para>
     88 	If you do not specify any files on the command-line, and you
     89 	have a recent version of <command>&dhpackage;</command>, the
     90 	input file will be read from standard input.
     91 	</para>
     92 
     93   </refsect1>
     94 
     95   <refsect1>
     96     <title>WELL-FORMED DOCUMENTS</title>
     97 
     98 	<para>
     99 	  A well-formed document must adhere to the
    100 	  following rules:
    101 	</para>
    102 
    103 	<itemizedlist>
    104       <listitem><para>
    105 	    The file begins with an XML declaration.  For instance,
    106 		<literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
    107 		<emphasis>NOTE:</emphasis>
    108 		<command>&dhpackage;</command> does not currently
    109 		check for a valid XML declaration.
    110       </para></listitem>
    111       <listitem><para>
    112 		Every start tag is either empty (&lt;tag/&gt;)
    113 		or has a corresponding end tag.
    114       </para></listitem>
    115       <listitem><para>
    116 	    There is exactly one root element.  This element must contain
    117 		all other elements in the document.  Only comments, white
    118 		space, and processing instructions may come after the close
    119 		of the root element.
    120       </para></listitem>
    121       <listitem><para>
    122 		All elements nest properly.
    123       </para></listitem>
    124       <listitem><para>
    125 		All attribute values are enclosed in quotes (either single
    126 		or double).
    127       </para></listitem>
    128     </itemizedlist>
    129 
    130 	<para>
    131 	  If the document has a DTD, and it strictly complies with that
    132 	  DTD, then the document is also considered <emphasis>valid</emphasis>.
    133 	  <command>&dhpackage;</command> is a non-validating parser --
    134 	  it does not check the DTD.  However, it does support
    135 	  external entities (see the <option>-x</option> option).
    136 	</para>
    137   </refsect1>
    138 
    139   <refsect1>
    140     <title>OPTIONS</title>
    141 
    142 <para>
    143 When an option includes an argument, you may specify the argument either
    144 separately ("<option>-d</option> output") or concatenated with the
    145 option ("<option>-d</option>output").  <command>&dhpackage;</command>
    146 supports both.
    147 </para>
    148 
    149     <variablelist>
    150 
    151       <varlistentry>
    152         <term><option>-c</option></term>
    153         <listitem>
    154 		<para>
    155   If the input file is well-formed and <command>&dhpackage;</command>
    156   doesn't encounter any errors, the input file is simply copied to
    157   the output directory unchanged.
    158   This implies no namespaces (turns off <option>-n</option>) and
    159   requires <option>-d</option> to specify an output file.
    160   		</para>
    161         </listitem>
    162       </varlistentry>
    163 
    164       <varlistentry>
    165         <term><option>-d output-dir</option></term>
    166         <listitem>
    167 		<para>
    168   Specifies a directory to contain transformed
    169   representations of the input files.
    170   By default, <option>-d</option> outputs a canonical representation
    171   (described below).
    172   You can select different output formats using <option>-c</option>
    173   and <option>-m</option>.
    174 	  </para>
    175 	  <para>
    176   The output filenames will
    177   be exactly the same as the input filenames or "STDIN" if the input is
    178   coming from standard input.  Therefore, you must be careful that the
    179   output file does not go into the same directory as the input
    180   file.  Otherwise, <command>&dhpackage;</command> will delete the
    181   input file before it generates the output file (just like running
    182   <literal>cat &lt; file &gt; file</literal> in most shells).
    183 	  </para>
    184 	  <para> 
    185   Two structurally equivalent XML documents have a byte-for-byte
    186   identical canonical XML representation.
    187   Note that ignorable white space is considered significant and
    188   is treated equivalently to data.
    189   More on canonical XML can be found at
    190   http://www.jclark.com/xml/canonxml.html .
    191 	  </para>
    192         </listitem>
    193       </varlistentry>
    194 
    195       <varlistentry>
    196         <term><option>-e encoding</option></term>
    197         <listitem>
    198 		<para>
    199    Specifies the character encoding for the document, overriding
    200    any document encoding declaration.  <command>&dhpackage;</command>
    201    supports four built-in encodings:
    202    	<literal>US-ASCII</literal>,
    203 	<literal>UTF-8</literal>,
    204 	<literal>UTF-16</literal>, and
    205 	<literal>ISO-8859-1</literal>.
    206    Also see the <option>-w</option> option.
    207 	   </para>
    208         </listitem>
    209       </varlistentry>
    210 
    211       <varlistentry>
    212         <term><option>-m</option></term>
    213         <listitem>
    214 		<para>
    215   Outputs some strange sort of XML file that completely
    216   describes the the input file, including character postitions.
    217   Requires <option>-d</option> to specify an output file.
    218 	   </para>
    219         </listitem>
    220       </varlistentry>
    221 
    222       <varlistentry>
    223         <term><option>-n</option></term>
    224         <listitem>
    225 		<para>
    226   Turns on namespace processing.  (describe namespaces)
    227   <option>-c</option> disables namespaces.
    228 	   </para>
    229         </listitem>
    230       </varlistentry>
    231 
    232       <varlistentry>
    233         <term><option>-p</option></term>
    234         <listitem>
    235 		<para>
    236     Tells xmlwf to process external DTDs and parameter
    237     entities.
    238 	 </para>
    239 	 <para>
    240    Normally <command>&dhpackage;</command> never parses parameter
    241    entities.  <option>-p</option> tells it to always parse them.
    242    <option>-p</option> implies <option>-x</option>.
    243 	   </para>
    244         </listitem>
    245       </varlistentry>
    246 
    247       <varlistentry>
    248         <term><option>-r</option></term>
    249         <listitem>
    250 		<para>
    251    Normally <command>&dhpackage;</command> memory-maps the XML file
    252    before parsing; this can result in faster parsing on many
    253    platforms.
    254    <option>-r</option> turns off memory-mapping and uses normal file
    255    IO calls instead.
    256    Of course, memory-mapping is automatically turned off
    257    when reading from standard input.
    258 	   </para>
    259 		<para>
    260    Use of memory-mapping can cause some platforms to report
    261    substantially higher memory usage for
    262    <command>&dhpackage;</command>, but this appears to be a matter of
    263    the operating system reporting memory in a strange way; there is
    264    not a leak in <command>&dhpackage;</command>.
    265            </para>
    266         </listitem>
    267       </varlistentry>
    268 
    269       <varlistentry>
    270         <term><option>-s</option></term>
    271         <listitem>
    272 		<para>
    273   Prints an error if the document is not standalone. 
    274   A document is standalone if it has no external subset and no
    275   references to parameter entities.
    276 	   </para>
    277         </listitem>
    278       </varlistentry>
    279 
    280       <varlistentry>
    281         <term><option>-t</option></term>
    282         <listitem>
    283 		<para>
    284   Turns on timings.  This tells Expat to parse the entire file,
    285   but not perform any processing.
    286   This gives a fairly accurate idea of the raw speed of Expat itself
    287   without client overhead.
    288   <option>-t</option> turns off most of the output options
    289   (<option>-d</option>, <option>-m</option>, <option>-c</option>,
    290   ...).
    291 	   </para>
    292         </listitem>
    293       </varlistentry>
    294 
    295       <varlistentry>
    296         <term><option>-v</option></term>
    297         <listitem>
    298 		<para>
    299   Prints the version of the Expat library being used, including some
    300   information on the compile-time configuration of the library, and
    301   then exits.
    302 	   </para>
    303         </listitem>
    304       </varlistentry>
    305 
    306       <varlistentry>
    307         <term><option>-w</option></term>
    308         <listitem>
    309 		<para>
    310   Enables support for Windows code pages.
    311   Normally, <command>&dhpackage;</command> will throw an error if it
    312   runs across an encoding that it is not equipped to handle itself.  With
    313   <option>-w</option>, &dhpackage; will try to use a Windows code
    314   page.  See also <option>-e</option>.
    315 	   </para>
    316         </listitem>
    317       </varlistentry>
    318 
    319       <varlistentry>
    320         <term><option>-x</option></term>
    321         <listitem>
    322 		<para>
    323   Turns on parsing external entities.
    324   </para>
    325 <para>
    326   Non-validating parsers are not required to resolve external
    327   entities, or even expand entities at all.
    328   Expat always expands internal entities (?),
    329   but external entity parsing must be enabled explicitly.
    330   </para>
    331   <para>
    332   External entities are simply entities that obtain their
    333   data from outside the XML file currently being parsed.
    334   </para>
    335   <para>
    336   This is an example of an internal entity:
    337 <literallayout>
    338 &lt;!ENTITY vers '1.0.2'&gt;
    339 </literallayout>
    340   </para>
    341   <para>
    342   And here are some examples of external entities:
    343 
    344 <literallayout>
    345 &lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
    346 &lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
    347 </literallayout>
    348 
    349 	   </para>
    350         </listitem>
    351       </varlistentry>
    352 
    353       <varlistentry>
    354         <term><option>--</option></term>
    355         <listitem>
    356 		<para>
    357     (Two hyphens.)
    358     Terminates the list of options.  This is only needed if a filename
    359     starts with a hyphen.  For example:
    360 	   </para>
    361 <literallayout>
    362 &dhpackage; -- -myfile.xml
    363 </literallayout>
    364 		<para>
    365     will run <command>&dhpackage;</command> on the file
    366     <filename>-myfile.xml</filename>.
    367 	   </para>
    368         </listitem>
    369       </varlistentry>
    370     </variablelist>
    371 
    372 	<para>
    373     Older versions of <command>&dhpackage;</command> do not support
    374     reading from standard input.
    375 	</para>
    376   </refsect1>
    377 
    378   <refsect1>
    379   <title>OUTPUT</title>
    380     <para>
    381 	If an input file is not well-formed,
    382 	<command>&dhpackage;</command> prints a single line describing
    383 	the problem to standard output.  If a file is well formed,
    384 	<command>&dhpackage;</command> outputs nothing.
    385 	Note that the result code is <emphasis>not</emphasis> set.
    386 	</para>
    387   </refsect1>
    388   
    389   <refsect1>
    390     <title>BUGS</title>
    391 	<para>
    392 	According to the W3C standard, an XML file without a
    393 	declaration at the beginning is not considered well-formed.
    394 	However, <command>&dhpackage;</command> allows this to pass.
    395 	</para>
    396 	<para>
    397 	<command>&dhpackage;</command> returns a 0 - noerr result,
    398 	even if the file is not well-formed.  There is no good way for
    399 	a program to use <command>&dhpackage;</command> to quickly
    400 	check a file -- it must parse <command>&dhpackage;</command>'s
    401 	standard output.
    402 	</para>
    403 	<para>
    404 	The errors should go to standard error, not standard output.
    405 	</para>
    406 	<para>
    407 	There should be a way to get <option>-d</option> to send its
    408 	output to standard output rather than forcing the user to send
    409 	it to a file.
    410 	</para>
    411 	<para>
    412 	I have no idea why anyone would want to use the
    413 	<option>-d</option>, <option>-c</option>, and
    414 	<option>-m</option> options.  If someone could explain it to
    415 	me, I'd like to add this information to this manpage.
    416 	</para>
    417   </refsect1>
    418 
    419   <refsect1>
    420     <title>ALTERNATIVES</title>
    421 	<para>
    422 	  Here are some XML validators on the web:
    423 
    424 <literallayout>
    425 http://www.hcrc.ed.ac.uk/~richard/xml-check.html
    426 http://www.stg.brown.edu/service/xmlvalid/
    427 http://www.scripting.com/frontier5/xml/code/xmlValidator.html
    428 http://www.xml.com/pub/a/tools/ruwf/check.html
    429 </literallayout>
    430 
    431 		 </para>
    432   </refsect1>
    433 
    434   <refsect1>
    435     <title>SEE ALSO</title>
    436 	<para>
    437 
    438 <literallayout>
    439 The Expat home page:        http://www.libexpat.org/
    440 The W3 XML specification:   http://www.w3.org/TR/REC-xml
    441 </literallayout>
    442 
    443 	</para>
    444   </refsect1>
    445 
    446   <refsect1>
    447     <title>AUTHOR</title>
    448     <para>
    449 	  This manual page was written by &dhusername; &dhemail; for
    450       the &debian; system (but may be used by others).  Permission is
    451       granted to copy, distribute and/or modify this document under
    452       the terms of the <acronym>GNU</acronym> Free Documentation
    453       License, Version 1.1.
    454 	</para>
    455   </refsect1>
    456 </refentry>
    457 
    458 <!-- Keep this comment at the end of the file
    459 Local variables:
    460 mode: sgml
    461 sgml-omittag:t
    462 sgml-shorttag:t
    463 sgml-minimize-attributes:nil
    464 sgml-always-quote-attributes:t
    465 sgml-indent-step:2
    466 sgml-indent-data:t
    467 sgml-parent-document:nil
    468 sgml-default-dtd-file:nil
    469 sgml-exposed-tags:nil
    470 sgml-local-catalogs:nil
    471 sgml-local-ecat-files:nil
    472 End:
    473 -->
    474