Home | History | Annotate | Download | only in doc
      1 <!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
      2 
      3 <!-- Process this file with docbook-to-man to generate an nroff manual
      4      page: `docbook-to-man manpage.sgml > manpage.1'.  You may view
      5      the manual page with: `docbook-to-man manpage.sgml | nroff -man |
      6      less'.  A typical entry in a Makefile or Makefile.am is:
      7 
      8 manpage.1: manpage.sgml
      9 	docbook-to-man $< > $@
     10   -->
     11 
     12   <!-- Fill in your name for FIRSTNAME and SURNAME. -->
     13   <!ENTITY dhfirstname "<firstname>Scott</firstname>">
     14   <!ENTITY dhsurname   "<surname>Bronson</surname>">
     15   <!-- Please adjust the date whenever revising the manpage. -->
     16   <!ENTITY dhdate      "<date>March 11, 2016</date>">
     17   <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
     18        allowed: see man(7), man(1). -->
     19   <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
     20   <!ENTITY dhemail     "<email>bronson (a] rinspin.com</email>">
     21   <!ENTITY dhusername  "Scott Bronson">
     22   <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
     23   <!ENTITY dhpackage   "xmlwf">
     24 
     25   <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
     26   <!ENTITY gnu         "<acronym>GNU</acronym>">
     27 ]>
     28 
     29 <refentry>
     30   <refentryinfo>
     31     <address>
     32       &dhemail;
     33     </address>
     34     <author>
     35       &dhfirstname;
     36       &dhsurname;
     37     </author>
     38     <copyright>
     39       <year>2001</year>
     40       <holder>&dhusername;</holder>
     41     </copyright>
     42     &dhdate;
     43   </refentryinfo>
     44   <refmeta>
     45     &dhucpackage;
     46 
     47     &dhsection;
     48   </refmeta>
     49   <refnamediv>
     50     <refname>&dhpackage;</refname>
     51 
     52     <refpurpose>Determines if an XML document is well-formed</refpurpose>
     53   </refnamediv>
     54   <refsynopsisdiv>
     55     <cmdsynopsis>
     56       <command>&dhpackage;</command>
     57 	  <arg><option>-s</option></arg>
     58 	  <arg><option>-n</option></arg>
     59 	  <arg><option>-p</option></arg>
     60 	  <arg><option>-x</option></arg>
     61 
     62 	  <arg><option>-e <replaceable>encoding</replaceable></option></arg>
     63 	  <arg><option>-w</option></arg>
     64 
     65 	  <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
     66 	  <arg><option>-c</option></arg>
     67 	  <arg><option>-m</option></arg>
     68 
     69 	  <arg><option>-r</option></arg>
     70 	  <arg><option>-t</option></arg>
     71 
     72 	  <arg><option>-v</option></arg>
     73 
     74 	  <arg>file ...</arg>
     75     </cmdsynopsis>
     76   </refsynopsisdiv>
     77  
     78   <refsect1>
     79     <title>DESCRIPTION</title>
     80 
     81     <para>
     82 	<command>&dhpackage;</command> uses the Expat library to
     83 	determine if an XML document is well-formed.  It is
     84 	non-validating.
     85 	</para>
     86 
     87 	<para>
     88 	If you do not specify any files on the command-line, and you
     89 	have a recent version of <command>&dhpackage;</command>, the
     90 	input file will be read from standard input.
     91 	</para>
     92 
     93   </refsect1>
     94 
     95   <refsect1>
     96     <title>WELL-FORMED DOCUMENTS</title>
     97 
     98 	<para>
     99 	  A well-formed document must adhere to the
    100 	  following rules:
    101 	</para>
    102 
    103 	<itemizedlist>
    104       <listitem><para>
    105 	    The file begins with an XML declaration.  For instance,
    106 		<literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
    107 		<emphasis>NOTE:</emphasis>
    108 		<command>&dhpackage;</command> does not currently
    109 		check for a valid XML declaration.
    110       </para></listitem>
    111       <listitem><para>
    112 		Every start tag is either empty (&lt;tag/&gt;)
    113 		or has a corresponding end tag.
    114       </para></listitem>
    115       <listitem><para>
    116 	    There is exactly one root element.  This element must contain
    117 		all other elements in the document.  Only comments, white
    118 		space, and processing instructions may come after the close
    119 		of the root element.
    120       </para></listitem>
    121       <listitem><para>
    122 		All elements nest properly.
    123       </para></listitem>
    124       <listitem><para>
    125 		All attribute values are enclosed in quotes (either single
    126 		or double).
    127       </para></listitem>
    128     </itemizedlist>
    129 
    130 	<para>
    131 	  If the document has a DTD, and it strictly complies with that
    132 	  DTD, then the document is also considered <emphasis>valid</emphasis>.
    133 	  <command>&dhpackage;</command> is a non-validating parser --
    134 	  it does not check the DTD.  However, it does support
    135 	  external entities (see the <option>-x</option> option).
    136 	</para>
    137   </refsect1>
    138 
    139   <refsect1>
    140     <title>OPTIONS</title>
    141 
    142 <para>
    143 When an option includes an argument, you may specify the argument either
    144 separately ("<option>-d</option> output") or concatenated with the
    145 option ("<option>-d</option>output").  <command>&dhpackage;</command>
    146 supports both.
    147 </para>
    148 
    149     <variablelist>
    150 
    151       <varlistentry>
    152         <term><option>-c</option></term>
    153         <listitem>
    154 		<para>
    155   If the input file is well-formed and <command>&dhpackage;</command>
    156   doesn't encounter any errors, the input file is simply copied to
    157   the output directory unchanged.
    158   This implies no namespaces (turns off <option>-n</option>) and
    159   requires <option>-d</option> to specify an output file.
    160   		</para>
    161         </listitem>
    162       </varlistentry>
    163 
    164       <varlistentry>
    165         <term><option>-d output-dir</option></term>
    166         <listitem>
    167 		<para>
    168   Specifies a directory to contain transformed
    169   representations of the input files.
    170   By default, <option>-d</option> outputs a canonical representation
    171   (described below).
    172   You can select different output formats using <option>-c</option>
    173   and <option>-m</option>.
    174 	  </para>
    175 	  <para>
    176   The output filenames will
    177   be exactly the same as the input filenames or "STDIN" if the input is
    178   coming from standard input.  Therefore, you must be careful that the
    179   output file does not go into the same directory as the input
    180   file.  Otherwise, <command>&dhpackage;</command> will delete the
    181   input file before it generates the output file (just like running
    182   <literal>cat &lt; file &gt; file</literal> in most shells).
    183 	  </para>
    184 	  <para> 
    185   Two structurally equivalent XML documents have a byte-for-byte
    186   identical canonical XML representation.
    187   Note that ignorable white space is considered significant and
    188   is treated equivalently to data.
    189   More on canonical XML can be found at
    190   http://www.jclark.com/xml/canonxml.html .
    191 	  </para>
    192         </listitem>
    193       </varlistentry>
    194 
    195       <varlistentry>
    196         <term><option>-e encoding</option></term>
    197         <listitem>
    198 		<para>
    199    Specifies the character encoding for the document, overriding
    200    any document encoding declaration.  <command>&dhpackage;</command>
    201    supports four built-in encodings:
    202    	<literal>US-ASCII</literal>,
    203 	<literal>UTF-8</literal>,
    204 	<literal>UTF-16</literal>, and
    205 	<literal>ISO-8859-1</literal>.
    206    Also see the <option>-w</option> option.
    207 	   </para>
    208         </listitem>
    209       </varlistentry>
    210 
    211       <varlistentry>
    212         <term><option>-m</option></term>
    213         <listitem>
    214 		<para>
    215   Outputs some strange sort of XML file that completely
    216   describes the input file, including character positions.
    217   Requires <option>-d</option> to specify an output file.
    218 	   </para>
    219         </listitem>
    220       </varlistentry>
    221 
    222       <varlistentry>
    223         <term><option>-n</option></term>
    224         <listitem>
    225 		<para>
    226   Turns on namespace processing.  (describe namespaces)
    227   <option>-c</option> disables namespaces.
    228 	   </para>
    229         </listitem>
    230       </varlistentry>
    231 
    232       <varlistentry>
    233         <term><option>-p</option></term>
    234         <listitem>
    235 		<para>
    236     Tells xmlwf to process external DTDs and parameter
    237     entities.
    238 	 </para>
    239 	 <para>
    240    Normally <command>&dhpackage;</command> never parses parameter
    241    entities.  <option>-p</option> tells it to always parse them.
    242    <option>-p</option> implies <option>-x</option>.
    243 	   </para>
    244         </listitem>
    245       </varlistentry>
    246 
    247       <varlistentry>
    248         <term><option>-r</option></term>
    249         <listitem>
    250 		<para>
    251    Normally <command>&dhpackage;</command> memory-maps the XML file
    252    before parsing; this can result in faster parsing on many
    253    platforms.
    254    <option>-r</option> turns off memory-mapping and uses normal file
    255    IO calls instead.
    256    Of course, memory-mapping is automatically turned off
    257    when reading from standard input.
    258 	   </para>
    259 		<para>
    260    Use of memory-mapping can cause some platforms to report
    261    substantially higher memory usage for
    262    <command>&dhpackage;</command>, but this appears to be a matter of
    263    the operating system reporting memory in a strange way; there is
    264    not a leak in <command>&dhpackage;</command>.
    265            </para>
    266         </listitem>
    267       </varlistentry>
    268 
    269       <varlistentry>
    270         <term><option>-s</option></term>
    271         <listitem>
    272 		<para>
    273   Prints an error if the document is not standalone. 
    274   A document is standalone if it has no external subset and no
    275   references to parameter entities.
    276 	   </para>
    277         </listitem>
    278       </varlistentry>
    279 
    280       <varlistentry>
    281         <term><option>-t</option></term>
    282         <listitem>
    283 		<para>
    284   Turns on timings.  This tells Expat to parse the entire file,
    285   but not perform any processing.
    286   This gives a fairly accurate idea of the raw speed of Expat itself
    287   without client overhead.
    288   <option>-t</option> turns off most of the output options
    289   (<option>-d</option>, <option>-m</option>, <option>-c</option>, ...).
    290 	   </para>
    291         </listitem>
    292       </varlistentry>
    293 
    294       <varlistentry>
    295         <term><option>-v</option></term>
    296         <listitem>
    297 		<para>
    298   Prints the version of the Expat library being used, including some
    299   information on the compile-time configuration of the library, and
    300   then exits.
    301 	   </para>
    302         </listitem>
    303       </varlistentry>
    304 
    305       <varlistentry>
    306         <term><option>-w</option></term>
    307         <listitem>
    308 		<para>
    309   Enables support for Windows code pages.
    310   Normally, <command>&dhpackage;</command> will throw an error if it
    311   runs across an encoding that it is not equipped to handle itself.  With
    312   <option>-w</option>, &dhpackage; will try to use a Windows code
    313   page.  See also <option>-e</option>.
    314 	   </para>
    315         </listitem>
    316       </varlistentry>
    317 
    318       <varlistentry>
    319         <term><option>-x</option></term>
    320         <listitem>
    321 		<para>
    322   Turns on parsing external entities.
    323   </para>
    324 <para>
    325   Non-validating parsers are not required to resolve external
    326   entities, or even expand entities at all.
    327   Expat always expands internal entities (?),
    328   but external entity parsing must be enabled explicitly.
    329   </para>
    330   <para>
    331   External entities are simply entities that obtain their
    332   data from outside the XML file currently being parsed.
    333   </para>
    334   <para>
    335   This is an example of an internal entity:
    336 <literallayout>
    337 &lt;!ENTITY vers '1.0.2'&gt;
    338 </literallayout>
    339   </para>
    340   <para>
    341   And here are some examples of external entities:
    342 
    343 <literallayout>
    344 &lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
    345 &lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
    346 </literallayout>
    347 
    348 	   </para>
    349         </listitem>
    350       </varlistentry>
    351 
    352       <varlistentry>
    353         <term><option>--</option></term>
    354         <listitem>
    355 		<para>
    356     (Two hyphens.)
    357     Terminates the list of options.  This is only needed if a filename
    358     starts with a hyphen.  For example:
    359 	   </para>
    360 <literallayout>
    361 &dhpackage; -- -myfile.xml
    362 </literallayout>
    363 		<para>
    364     will run <command>&dhpackage;</command> on the file
    365     <filename>-myfile.xml</filename>.
    366 	   </para>
    367         </listitem>
    368       </varlistentry>
    369     </variablelist>
    370 
    371 	<para>
    372     Older versions of <command>&dhpackage;</command> do not support
    373     reading from standard input.
    374 	</para>
    375   </refsect1>
    376 
    377   <refsect1>
    378   <title>OUTPUT</title>
    379     <para>
    380 	If an input file is not well-formed,
    381 	<command>&dhpackage;</command> prints a single line describing
    382 	the problem to standard output.  If a file is well formed,
    383 	<command>&dhpackage;</command> outputs nothing.
    384 	Note that the result code is <emphasis>not</emphasis> set.
    385 	</para>
    386   </refsect1>
    387   
    388   <refsect1>
    389     <title>BUGS</title>
    390 	<para>
    391 	<command>&dhpackage;</command> returns a 0 - noerr result,
    392 	even if the file is not well-formed.  There is no good way for
    393 	a program to use <command>&dhpackage;</command> to quickly
    394 	check a file -- it must parse <command>&dhpackage;</command>'s
    395 	standard output.
    396 	</para>
    397 	<para>
    398 	The errors should go to standard error, not standard output.
    399 	</para>
    400 	<para>
    401 	There should be a way to get <option>-d</option> to send its
    402 	output to standard output rather than forcing the user to send
    403 	it to a file.
    404 	</para>
    405 	<para>
    406 	I have no idea why anyone would want to use the
    407 	<option>-d</option>, <option>-c</option>, and
    408 	<option>-m</option> options.  If someone could explain it to
    409 	me, I'd like to add this information to this manpage.
    410 	</para>
    411   </refsect1>
    412 
    413   <refsect1>
    414     <title>ALTERNATIVES</title>
    415 	<para>
    416 	  Here are some XML validators on the web:
    417 
    418 <literallayout>
    419 http://www.hcrc.ed.ac.uk/~richard/xml-check.html
    420 http://www.stg.brown.edu/service/xmlvalid/
    421 http://www.scripting.com/frontier5/xml/code/xmlValidator.html
    422 http://www.xml.com/pub/a/tools/ruwf/check.html
    423 </literallayout>
    424 
    425 		 </para>
    426   </refsect1>
    427 
    428   <refsect1>
    429     <title>SEE ALSO</title>
    430 	<para>
    431 
    432 <literallayout>
    433 The Expat home page:        http://www.libexpat.org/
    434 The W3 XML specification:   http://www.w3.org/TR/REC-xml
    435 </literallayout>
    436 
    437 	</para>
    438   </refsect1>
    439 
    440   <refsect1>
    441     <title>AUTHOR</title>
    442     <para>
    443 	  This manual page was written by &dhusername; &dhemail; for
    444       the &debian; system (but may be used by others).  Permission is
    445       granted to copy, distribute and/or modify this document under
    446       the terms of the <acronym>GNU</acronym> Free Documentation
    447       License, Version 1.1.
    448 	</para>
    449   </refsect1>
    450 </refentry>
    451 
    452 <!-- Keep this comment at the end of the file
    453 Local variables:
    454 mode: sgml
    455 sgml-omittag:t
    456 sgml-shorttag:t
    457 sgml-minimize-attributes:nil
    458 sgml-always-quote-attributes:t
    459 sgml-indent-step:2
    460 sgml-indent-data:t
    461 sgml-parent-document:nil
    462 sgml-default-dtd-file:nil
    463 sgml-exposed-tags:nil
    464 sgml-local-catalogs:nil
    465 sgml-local-ecat-files:nil
    466 End:
    467 -->
    468