Home | History | Annotate | Download | only in libxmlrpg
      1       * Summary: interface for the encoding conversion functions
      2       * Description: interface for the encoding conversion functions needed for
      3       *              XML basic encoding and iconv() support.
      4       *
      5       * Related specs are
      6       * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
      7       * [ISO-10646]    UTF-8 and UTF-16 in Annexes
      8       * [ISO-8859-1]   ISO Latin-1 characters codes.
      9       * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
     10       *                Worldwide Character Encoding -- Version 1.0", Addison-
     11       *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
     12       *                described in Unicode Technical Report #4.
     13       * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
     14       *                Information Interchange, ANSI X3.4-1986.
     15       *
     16       * Copy: See Copyright for the status of this software.
     17       *
     18       * Author: Patrick Monnerat <pm (a] datasphere.ch>, DATASPHERE S.A.
     19 
     20       /if not defined(XML_CHAR_ENCODING_H__)
     21       /define XML_CHAR_ENCODING_H__
     22 
     23       /include "libxmlrpg/xmlversion"
     24       /include "libxmlrpg/xmlTypesC"
     25 
     26       * xmlCharEncoding:
     27       *
     28       * Predefined values for some standard encodings.
     29       * Libxml does not do beforehand translation on UTF8 and ISOLatinX.
     30       * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
     31       *
     32       * Anything else would have to be translated to UTF8 before being
     33       * given to the parser itself. The BOM for UTF16 and the encoding
     34       * declaration are looked at and a converter is looked for at that
     35       * point. If not found the parser stops here as asked by the XML REC. A
     36       * converter can be registered by the user
     37       * xmlRegisterCharEncodingHandler but the current form doesn't allow
     38       * stateful transcoding (a serious problem agreed !). If iconv has been
     39       * found it will be used automatically and allow stateful transcoding,
     40       * the simplest is then to be sure to enable iconv and to provide iconv
     41       * libs for the encoding support needed.
     42       *
     43       * Note that the generic "UTF-16" is not a predefined value.  Instead, only
     44       * the specific UTF-16LE and UTF-16BE are present.
     45 
     46      d xmlCharEncoding...
     47      d                 s                   based(######typedef######)
     48      d                                     like(xmlCenum)
     49      d  XML_CHAR_ENCODING_ERROR...                                              No encoding detected
     50      d                 c                   -1
     51      d  XML_CHAR_ENCODING_NONE...                                               No encoding detected
     52      d                 c                   0
     53      d  XML_CHAR_ENCODING_UTF8...                                               UTF-8
     54      d                 c                   1
     55      d  XML_CHAR_ENCODING_UTF16LE...                                            UTF-16 little endian
     56      d                 c                   2
     57      d  XML_CHAR_ENCODING_UTF16BE...                                            UTF-16 big endian
     58      d                 c                   3
     59      d  XML_CHAR_ENCODING_UCS4LE...                                             UCS-4 little endian
     60      d                 c                   4
     61      d  XML_CHAR_ENCODING_UCS4BE...                                             UCS-4 big endian
     62      d                 c                   5
     63      d  XML_CHAR_ENCODING_EBCDIC...                                             EBCDIC uh!
     64      d                 c                   6
     65      d  XML_CHAR_ENCODING_UCS4_2143...                                          UCS-4 unusual order
     66      d                 c                   7
     67      d  XML_CHAR_ENCODING_UCS4_3412...                                          UCS-4 unusual order
     68      d                 c                   8
     69      d  XML_CHAR_ENCODING_UCS2...                                               UCS-2
     70      d                 c                   9
     71      d  XML_CHAR_ENCODING_8859_1...                                             ISO-8859-1 ISOLatin1
     72      d                 c                   10
     73      d  XML_CHAR_ENCODING_8859_2...                                             ISO-8859-2 ISOLatin2
     74      d                 c                   11
     75      d  XML_CHAR_ENCODING_8859_3...                                             ISO-8859-3
     76      d                 c                   12
     77      d  XML_CHAR_ENCODING_8859_4...                                             ISO-8859-4
     78      d                 c                   13
     79      d  XML_CHAR_ENCODING_8859_5...                                             ISO-8859-5
     80      d                 c                   14
     81      d  XML_CHAR_ENCODING_8859_6...                                             ISO-8859-6
     82      d                 c                   15
     83      d  XML_CHAR_ENCODING_8859_7...                                             ISO-8859-7
     84      d                 c                   16
     85      d  XML_CHAR_ENCODING_8859_8...                                             ISO-8859-8
     86      d                 c                   17
     87      d  XML_CHAR_ENCODING_8859_9...                                             ISO-8859-9
     88      d                 c                   18
     89      d  XML_CHAR_ENCODING_2022_JP...                                            ISO-2022-JP
     90      d                 c                   19
     91      d  XML_CHAR_ENCODING_SHIFT_JIS...                                          Shift_JIS
     92      d                 c                   20
     93      d  XML_CHAR_ENCODING_EUC_JP...                                             EUC-JP
     94      d                 c                   21
     95      d  XML_CHAR_ENCODING_ASCII...                                              Pure ASCII
     96      d                 c                   22
     97 
     98       * xmlCharEncodingInputFunc:
     99       * @out:  a pointer to an array of bytes to store the UTF-8 result
    100       * @outlen:  the length of @out
    101       * @in:  a pointer to an array of chars in the original encoding
    102       * @inlen:  the length of @in
    103       *
    104       * Take a block of chars in the original encoding and try to convert
    105       * it to an UTF-8 block of chars out.
    106       *
    107       * Returns the number of bytes written, -1 if lack of space, or -2
    108       *     if the transcoding failed.
    109       * The value of @inlen after return is the number of octets consumed
    110       *     if the return value is positive, else unpredictiable.
    111       * The value of @outlen after return is the number of octets consumed.
    112 
    113      d xmlCharEncodingInputFunc...
    114      d                 s               *   based(######typedef######)
    115      d                                     procptr
    116 
    117       * xmlCharEncodingOutputFunc:
    118       * @out:  a pointer to an array of bytes to store the result
    119       * @outlen:  the length of @out
    120       * @in:  a pointer to an array of UTF-8 chars
    121       * @inlen:  the length of @in
    122       *
    123       * Take a block of UTF-8 chars in and try to convert it to another
    124       * encoding.
    125       * Note: a first call designed to produce heading info is called with
    126       * in = NULL. If stateful this should also initialize the encoder state.
    127       *
    128       * Returns the number of bytes written, -1 if lack of space, or -2
    129       *     if the transcoding failed.
    130       * The value of @inlen after return is the number of octets consumed
    131       *     if the return value is positive, else unpredictiable.
    132       * The value of @outlen after return is the number of octets produced.
    133 
    134      d xmlCharEncodingOutputFunc...
    135      d                 s               *   based(######typedef######)
    136      d                                     procptr
    137 
    138       * Block defining the handlers for non UTF-8 encodings.
    139       * If iconv is supported, there are two extra fields.
    140 
    141       /if defined(LIBXML_ICU_ENABLED)
    142      d uconv_t         ds                  based(######typedef######)
    143      d                                     align qualified
    144      d  uconv                          *                                        UConverter *
    145      d  utf8                           *                                        UConverter *
    146       /endif
    147 
    148      d xmlCharEncodingHandlerPtr...
    149      d                 s               *   based(######typedef######)
    150 
    151      d xmlCharEncodingHandler...
    152      d                 ds                  based(xmlCharEncodingHandlerPtr)
    153      d                                     align qualified
    154      d  name                           *                                        char *
    155      d  input                              like(xmlCharEncodingInputFunc)
    156      d  output                             like(xmlCharEncodingOutputFunc)
    157       *
    158       /if defined(LIBXML_ICONV_ENABLED)
    159      d  iconv_in                       *                                        iconv_t
    160      d  iconv_out                      *                                        iconv_t
    161       /endif                                                                    LIBXML_ICONV_ENABLED
    162       *
    163       /if defined(LIBXML_ICU_ENABLED)
    164      d  uconv_in                       *                                        uconv_t *
    165      d  uconv_out                      *                                        uconv_t *
    166       /endif                                                                    LIBXML_ICU_ENABLED
    167 
    168       /include "libxmlrpg/tree"
    169 
    170       * Interfaces for encoding handlers.
    171 
    172      d xmlInitCharEncodingHandlers...
    173      d                 pr                  extproc(
    174      d                                      'xmlInitCharEncodingHandlers')
    175 
    176      d xmlCleanupCharEncodingHandlers...
    177      d                 pr                  extproc(
    178      d                                      'xmlCleanupCharEncodingHandlers')
    179 
    180      d xmlRegisterCharEncodingHandler...
    181      d                 pr                  extproc(
    182      d                                      'xmlRegisterCharEncodingHandler')
    183      d  handler                            value like(xmlCharEncodingHandlerPtr)
    184 
    185      d xmlGetCharEncodingHandler...
    186      d                 pr                  extproc('xmlGetCharEncodingHandler')
    187      d                                     like(xmlCharEncodingHandlerPtr)
    188      d  enc                                value like(xmlCharEncoding)
    189 
    190      d xmlFindCharEncodingHandler...
    191      d                 pr                  extproc('xmlFindCharEncodingHandler')
    192      d                                     like(xmlCharEncodingHandlerPtr)
    193      d  name                           *   value options(*string)               const char *
    194 
    195      d xmlNewCharEncodingHandler...
    196      d                 pr                  extproc('xmlNewCharEncodingHandler')
    197      d                                     like(xmlCharEncodingHandlerPtr)
    198      d  name                           *   value options(*string)               const char *
    199      d  input                              value like(xmlCharEncodingInputFunc)
    200      d  output                             value like(xmlCharEncodingOutputFunc)
    201 
    202       * Interfaces for encoding names and aliases.
    203 
    204      d xmlAddEncodingAlias...
    205      d                 pr                  extproc('xmlAddEncodingAlias')
    206      d                                     like(xmlCint)
    207      d  name                           *   value options(*string)               const char *
    208      d  alias                          *   value options(*string)               const char *
    209 
    210      d xmlDelEncodingAlias...
    211      d                 pr                  extproc('xmlDelEncodingAlias')
    212      d                                     like(xmlCint)
    213      d  alias                          *   value options(*string)               const char *
    214 
    215      d xmlGetEncodingAlias...
    216      d                 pr              *   extproc('xmlGetEncodingAlias')       const char *
    217      d  alias                          *   value options(*string)               const char *
    218 
    219      d xmlCleanupEncodingAliases...
    220      d                 pr                  extproc('xmlCleanupEncodingAliases')
    221 
    222      d xmlParseCharEncoding...
    223      d                 pr                  extproc('xmlParseCharEncoding')
    224      d                                     like(xmlCharEncoding)
    225      d  name                           *   value options(*string)               const char *
    226 
    227      d xmlGetCharEncodingName...
    228      d                 pr              *   extproc('xmlGetCharEncodingName')    const char *
    229      d  enc                                value like(xmlCharEncoding)
    230 
    231       * Interfaces directly used by the parsers.
    232 
    233      d xmlDetectCharEncoding...
    234      d                 pr                  extproc('xmlDetectCharEncoding')
    235      d                                     like(xmlCharEncoding)
    236      d  in                             *   value options(*string)               const unsigned char*
    237      d  len                                value like(xmlCint)
    238 
    239      d xmlCharEncOutFunc...
    240      d                 pr                  extproc('xmlCharEncOutFunc')
    241      d                                     like(xmlCint)
    242      d  handler                            likeds(xmlCharEncodingHandler)
    243      d  out                                value like(xmlBufferPtr)
    244      d  in                                 value like(xmlBufferPtr)
    245 
    246      d xmlCharEncInFunc...
    247      d                 pr                  extproc('xmlCharEncInFunc')
    248      d                                     like(xmlCint)
    249      d  handler                            likeds(xmlCharEncodingHandler)
    250      d  out                                value like(xmlBufferPtr)
    251      d  in                                 value like(xmlBufferPtr)
    252 
    253      d xmlCharEncFirstLine...
    254      d                 pr                  extproc('xmlCharEncFirstLine')
    255      d                                     like(xmlCint)
    256      d  handler                            likeds(xmlCharEncodingHandler)
    257      d  out                                value like(xmlBufferPtr)
    258      d  in                                 value like(xmlBufferPtr)
    259 
    260      d xmlCharEncCloseFunc...
    261      d                 pr                  extproc('xmlCharEncCloseFunc')
    262      d                                     like(xmlCint)
    263      d  handler                            likeds(xmlCharEncodingHandler)
    264 
    265       * Export a few useful functions
    266 
    267       /if defined(LIBXML_OUTPUT_ENABLED)
    268      d UTF8Toisolat1   pr                  extproc('UTF8Toisolat1')
    269      d                                     like(xmlCint)
    270      d  out                       65535    options(*varsize)                    unsigned char (*)
    271      d  outlen                             like(xmlCint)
    272      d  in                             *   value options(*string)               const unsigned char*
    273      d  inlen                              like(xmlCint)
    274 
    275       /endif                                                                    LIBXML_OUTPUT_ENABLD
    276 
    277      d isolat1ToUTF8   pr                  extproc('isolat1ToUTF8')
    278      d                                     like(xmlCint)
    279      d  out                       65535    options(*varsize)                    unsigned char (*)
    280      d  outlen                             like(xmlCint)
    281      d  in                             *   value options(*string)               const unsigned char*
    282      d  inlen                              like(xmlCint)
    283 
    284       /endif                                                                    XML_CHAR_ENCODING_H
    285