Home | History | Annotate | Download | only in utils
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one
      3  * or more contributor license agreements. See the NOTICE file
      4  * distributed with this work for additional information
      5  * regarding copyright ownership. The ASF licenses this file
      6  * to you under the Apache License, Version 2.0 (the  "License");
      7  * you may not use this file except in compliance with the License.
      8  * You may obtain a copy of the License at
      9  *
     10  *     http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  */
     18 /*
     19  * $Id$
     20  */
     21 
     22 package org.apache.xml.utils;
     23 
     24 /**
     25  * This class defines the basic XML character properties. The data
     26  * in this class can be used to verify that a character is a valid
     27  * XML character or if the character is a space, name start, or name
     28  * character.
     29  * <p>
     30  * A series of convenience methods are supplied to ease the burden
     31  * of the developer. Because inlining the checks can improve per
     32  * character performance, the tables of character properties are
     33  * public. Using the character as an index into the <code>CHARS</code>
     34  * array and applying the appropriate mask flag (e.g.
     35  * <code>MASK_VALID</code>), yields the same results as calling the
     36  * convenience methods. There is one exception: check the comments
     37  * for the <code>isValid</code> method for details.
     38  *
     39  * @author Glenn Marcy, IBM
     40  * @author Andy Clark, IBM
     41  * @author Eric Ye, IBM
     42  * @author Arnaud  Le Hors, IBM
     43  * @author Rahul Srivastava, Sun Microsystems Inc.
     44  *
     45  * @version $Id: XMLChar.java,v 1.7 2002/01/29 01:15:18 lehors Exp $
     46  */
     47 public class XMLChar {
     48 
     49     //
     50     // Constants
     51     //
     52 
     53     /** Character flags. */
     54     private static final byte[] CHARS = new byte[1 << 16];
     55 
     56     /** Valid character mask. */
     57     public static final int MASK_VALID = 0x01;
     58 
     59     /** Space character mask. */
     60     public static final int MASK_SPACE = 0x02;
     61 
     62     /** Name start character mask. */
     63     public static final int MASK_NAME_START = 0x04;
     64 
     65     /** Name character mask. */
     66     public static final int MASK_NAME = 0x08;
     67 
     68     /** Pubid character mask. */
     69     public static final int MASK_PUBID = 0x10;
     70 
     71     /**
     72      * Content character mask. Special characters are those that can
     73      * be considered the start of markup, such as '&lt;' and '&amp;'.
     74      * The various newline characters are considered special as well.
     75      * All other valid XML characters can be considered content.
     76      * <p>
     77      * This is an optimization for the inner loop of character scanning.
     78      */
     79     public static final int MASK_CONTENT = 0x20;
     80 
     81     /** NCName start character mask. */
     82     public static final int MASK_NCNAME_START = 0x40;
     83 
     84     /** NCName character mask. */
     85     public static final int MASK_NCNAME = 0x80;
     86 
     87     //
     88     // Static initialization
     89     //
     90 
     91     static {
     92 
     93         //
     94         // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
     95         //              [#xE000-#xFFFD] | [#x10000-#x10FFFF]
     96         //
     97 
     98         int charRange[] = {
     99             0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
    100         };
    101 
    102         //
    103         // [3] S ::= (#x20 | #x9 | #xD | #xA)+
    104         //
    105 
    106         int spaceChar[] = {
    107             0x0020, 0x0009, 0x000D, 0x000A,
    108         };
    109 
    110         //
    111         // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
    112         //                  CombiningChar | Extender
    113         //
    114 
    115         int nameChar[] = {
    116             0x002D, 0x002E, // '-' and '.'
    117         };
    118 
    119         //
    120         // [5] Name ::= (Letter | '_' | ':') (NameChar)*
    121         //
    122 
    123         int nameStartChar[] = {
    124             0x003A, 0x005F, // ':' and '_'
    125         };
    126 
    127         //
    128         // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
    129         //
    130 
    131         int pubidChar[] = {
    132             0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
    133             0x005F
    134         };
    135 
    136         int pubidRange[] = {
    137             0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
    138         };
    139 
    140         //
    141         // [84] Letter ::= BaseChar | Ideographic
    142         //
    143 
    144         int letterRange[] = {
    145             // BaseChar
    146             0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
    147             0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
    148             0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
    149             0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
    150             0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
    151             0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
    152             0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
    153             0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
    154             0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
    155             0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
    156             0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
    157             0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
    158             0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
    159             0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
    160             0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
    161             0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
    162             0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
    163             0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
    164             0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
    165             0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
    166             0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
    167             0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
    168             0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
    169             0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
    170             0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
    171             0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
    172             0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
    173             0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
    174             0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
    175             0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
    176             0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
    177             0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
    178             0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
    179             0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
    180             0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
    181             0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
    182             0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
    183             0xAC00, 0xD7A3,
    184             // Ideographic
    185             0x3021, 0x3029, 0x4E00, 0x9FA5,
    186         };
    187         int letterChar[] = {
    188             // BaseChar
    189             0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
    190             0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
    191             0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
    192             0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
    193             0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
    194             0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
    195             0x1F5D, 0x1FBE, 0x2126, 0x212E,
    196             // Ideographic
    197             0x3007,
    198         };
    199 
    200         //
    201         // [87] CombiningChar ::= ...
    202         //
    203 
    204         int combiningCharRange[] = {
    205             0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
    206             0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
    207             0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
    208             0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
    209             0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
    210             0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
    211             0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
    212             0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
    213             0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
    214             0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
    215             0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
    216             0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
    217             0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
    218             0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
    219             0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
    220             0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
    221             0x20D0, 0x20DC, 0x302A, 0x302F,
    222         };
    223 
    224         int combiningCharChar[] = {
    225             0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
    226             0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
    227             0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
    228             0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
    229         };
    230 
    231         //
    232         // [88] Digit ::= ...
    233         //
    234 
    235         int digitRange[] = {
    236             0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
    237             0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
    238             0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
    239             0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
    240         };
    241 
    242         //
    243         // [89] Extender ::= ...
    244         //
    245 
    246         int extenderRange[] = {
    247             0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
    248         };
    249 
    250         int extenderChar[] = {
    251             0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
    252         };
    253 
    254         //
    255         // SpecialChar ::= '<', '&', '\n', '\r', ']'
    256         //
    257 
    258         int specialChar[] = {
    259             '<', '&', '\n', '\r', ']',
    260         };
    261 
    262         //
    263         // Initialize
    264         //
    265 
    266         // set valid characters
    267         for (int i = 0; i < charRange.length; i += 2) {
    268             for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
    269                 CHARS[j] |= MASK_VALID | MASK_CONTENT;
    270             }
    271         }
    272 
    273         // remove special characters
    274         for (int i = 0; i < specialChar.length; i++) {
    275             CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
    276         }
    277 
    278         // set space characters
    279         for (int i = 0; i < spaceChar.length; i++) {
    280             CHARS[spaceChar[i]] |= MASK_SPACE;
    281         }
    282 
    283         // set name start characters
    284         for (int i = 0; i < nameStartChar.length; i++) {
    285             CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
    286                                        MASK_NCNAME_START | MASK_NCNAME;
    287         }
    288         for (int i = 0; i < letterRange.length; i += 2) {
    289             for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
    290                 CHARS[j] |= MASK_NAME_START | MASK_NAME |
    291                             MASK_NCNAME_START | MASK_NCNAME;
    292             }
    293         }
    294         for (int i = 0; i < letterChar.length; i++) {
    295             CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
    296                                     MASK_NCNAME_START | MASK_NCNAME;
    297         }
    298 
    299         // set name characters
    300         for (int i = 0; i < nameChar.length; i++) {
    301             CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
    302         }
    303         for (int i = 0; i < digitRange.length; i += 2) {
    304             for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
    305                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
    306             }
    307         }
    308         for (int i = 0; i < combiningCharRange.length; i += 2) {
    309             for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
    310                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
    311             }
    312         }
    313         for (int i = 0; i < combiningCharChar.length; i++) {
    314             CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
    315         }
    316         for (int i = 0; i < extenderRange.length; i += 2) {
    317             for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
    318                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
    319             }
    320         }
    321         for (int i = 0; i < extenderChar.length; i++) {
    322             CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
    323         }
    324 
    325         // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
    326         CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
    327 
    328         // set Pubid characters
    329         for (int i = 0; i < pubidChar.length; i++) {
    330             CHARS[pubidChar[i]] |= MASK_PUBID;
    331         }
    332         for (int i = 0; i < pubidRange.length; i += 2) {
    333             for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
    334                 CHARS[j] |= MASK_PUBID;
    335             }
    336         }
    337 
    338     } // <clinit>()
    339 
    340     //
    341     // Public static methods
    342     //
    343 
    344     /**
    345      * Returns true if the specified character is a supplemental character.
    346      *
    347      * @param c The character to check.
    348      */
    349     public static boolean isSupplemental(int c) {
    350         return (c >= 0x10000 && c <= 0x10FFFF);
    351     }
    352 
    353     /**
    354      * Returns true the supplemental character corresponding to the given
    355      * surrogates.
    356      *
    357      * @param h The high surrogate.
    358      * @param l The low surrogate.
    359      */
    360     public static int supplemental(char h, char l) {
    361         return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
    362     }
    363 
    364     /**
    365      * Returns the high surrogate of a supplemental character
    366      *
    367      * @param c The supplemental character to "split".
    368      */
    369     public static char highSurrogate(int c) {
    370         return (char) (((c - 0x00010000) >> 10) + 0xD800);
    371     }
    372 
    373     /**
    374      * Returns the low surrogate of a supplemental character
    375      *
    376      * @param c The supplemental character to "split".
    377      */
    378     public static char lowSurrogate(int c) {
    379         return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
    380     }
    381 
    382     /**
    383      * Returns whether the given character is a high surrogate
    384      *
    385      * @param c The character to check.
    386      */
    387     public static boolean isHighSurrogate(int c) {
    388         return (0xD800 <= c && c <= 0xDBFF);
    389     }
    390 
    391     /**
    392      * Returns whether the given character is a low surrogate
    393      *
    394      * @param c The character to check.
    395      */
    396     public static boolean isLowSurrogate(int c) {
    397         return (0xDC00 <= c && c <= 0xDFFF);
    398     }
    399 
    400 
    401     /**
    402      * Returns true if the specified character is valid. This method
    403      * also checks the surrogate character range from 0x10000 to 0x10FFFF.
    404      * <p>
    405      * If the program chooses to apply the mask directly to the
    406      * <code>CHARS</code> array, then they are responsible for checking
    407      * the surrogate character range.
    408      *
    409      * @param c The character to check.
    410      */
    411     public static boolean isValid(int c) {
    412         return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
    413                (0x10000 <= c && c <= 0x10FFFF);
    414     } // isValid(int):boolean
    415 
    416     /**
    417      * Returns true if the specified character is invalid.
    418      *
    419      * @param c The character to check.
    420      */
    421     public static boolean isInvalid(int c) {
    422         return !isValid(c);
    423     } // isInvalid(int):boolean
    424 
    425     /**
    426      * Returns true if the specified character can be considered content.
    427      *
    428      * @param c The character to check.
    429      */
    430     public static boolean isContent(int c) {
    431         return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
    432                (0x10000 <= c && c <= 0x10FFFF);
    433     } // isContent(int):boolean
    434 
    435     /**
    436      * Returns true if the specified character can be considered markup.
    437      * Markup characters include '&lt;', '&amp;', and '%'.
    438      *
    439      * @param c The character to check.
    440      */
    441     public static boolean isMarkup(int c) {
    442         return c == '<' || c == '&' || c == '%';
    443     } // isMarkup(int):boolean
    444 
    445     /**
    446      * Returns true if the specified character is a space character
    447      * as defined by production [3] in the XML 1.0 specification.
    448      *
    449      * @param c The character to check.
    450      */
    451     public static boolean isSpace(int c) {
    452         return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
    453     } // isSpace(int):boolean
    454 
    455     /**
    456      * Returns true if the specified character is a valid name start
    457      * character as defined by production [5] in the XML 1.0
    458      * specification.
    459      *
    460      * @param c The character to check.
    461      */
    462     public static boolean isNameStart(int c) {
    463         return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
    464     } // isNameStart(int):boolean
    465 
    466     /**
    467      * Returns true if the specified character is a valid name
    468      * character as defined by production [4] in the XML 1.0
    469      * specification.
    470      *
    471      * @param c The character to check.
    472      */
    473     public static boolean isName(int c) {
    474         return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
    475     } // isName(int):boolean
    476 
    477     /**
    478      * Returns true if the specified character is a valid NCName start
    479      * character as defined by production [4] in Namespaces in XML
    480      * recommendation.
    481      *
    482      * @param c The character to check.
    483      */
    484     public static boolean isNCNameStart(int c) {
    485         return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
    486     } // isNCNameStart(int):boolean
    487 
    488     /**
    489      * Returns true if the specified character is a valid NCName
    490      * character as defined by production [5] in Namespaces in XML
    491      * recommendation.
    492      *
    493      * @param c The character to check.
    494      */
    495     public static boolean isNCName(int c) {
    496         return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
    497     } // isNCName(int):boolean
    498 
    499     /**
    500      * Returns true if the specified character is a valid Pubid
    501      * character as defined by production [13] in the XML 1.0
    502      * specification.
    503      *
    504      * @param c The character to check.
    505      */
    506     public static boolean isPubid(int c) {
    507         return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
    508     } // isPubid(int):boolean
    509 
    510     /*
    511      * [5] Name ::= (Letter | '_' | ':') (NameChar)*
    512      */
    513     /**
    514      * Check to see if a string is a valid Name according to [5]
    515      * in the XML 1.0 Recommendation
    516      *
    517      * @param name string to check
    518      * @return true if name is a valid Name
    519      */
    520     public static boolean isValidName(String name) {
    521         if (name.length() == 0)
    522             return false;
    523         char ch = name.charAt(0);
    524         if( isNameStart(ch) == false)
    525            return false;
    526         for (int i = 1; i < name.length(); i++ ) {
    527            ch = name.charAt(i);
    528            if( isName( ch ) == false ){
    529               return false;
    530            }
    531         }
    532         return true;
    533     } // isValidName(String):boolean
    534 
    535 
    536     /*
    537      * from the namespace rec
    538      * [4] NCName ::= (Letter | '_') (NCNameChar)*
    539      */
    540     /**
    541      * Check to see if a string is a valid NCName according to [4]
    542      * from the XML Namespaces 1.0 Recommendation
    543      *
    544      * @param ncName string to check
    545      * @return true if name is a valid NCName
    546      */
    547     public static boolean isValidNCName(String ncName) {
    548         if (ncName.length() == 0)
    549             return false;
    550         char ch = ncName.charAt(0);
    551         if( isNCNameStart(ch) == false)
    552            return false;
    553         for (int i = 1; i < ncName.length(); i++ ) {
    554            ch = ncName.charAt(i);
    555            if( isNCName( ch ) == false ){
    556               return false;
    557            }
    558         }
    559         return true;
    560     } // isValidNCName(String):boolean
    561 
    562     /*
    563      * [7] Nmtoken ::= (NameChar)+
    564      */
    565     /**
    566      * Check to see if a string is a valid Nmtoken according to [7]
    567      * in the XML 1.0 Recommendation
    568      *
    569      * @param nmtoken string to check
    570      * @return true if nmtoken is a valid Nmtoken
    571      */
    572     public static boolean isValidNmtoken(String nmtoken) {
    573         if (nmtoken.length() == 0)
    574             return false;
    575         for (int i = 0; i < nmtoken.length(); i++ ) {
    576            char ch = nmtoken.charAt(i);
    577            if(  ! isName( ch ) ){
    578               return false;
    579            }
    580         }
    581         return true;
    582     } // isValidName(String):boolean
    583 
    584 
    585 
    586 
    587 
    588     // encodings
    589 
    590     /**
    591      * Returns true if the encoding name is a valid IANA encoding.
    592      * This method does not verify that there is a decoder available
    593      * for this encoding, only that the characters are valid for an
    594      * IANA encoding name.
    595      *
    596      * @param ianaEncoding The IANA encoding name.
    597      */
    598     public static boolean isValidIANAEncoding(String ianaEncoding) {
    599         if (ianaEncoding != null) {
    600             int length = ianaEncoding.length();
    601             if (length > 0) {
    602                 char c = ianaEncoding.charAt(0);
    603                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
    604                     for (int i = 1; i < length; i++) {
    605                         c = ianaEncoding.charAt(i);
    606                         if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
    607                             (c < '0' || c > '9') && c != '.' && c != '_' &&
    608                             c != '-') {
    609                             return false;
    610                         }
    611                     }
    612                     return true;
    613                 }
    614             }
    615         }
    616         return false;
    617     } // isValidIANAEncoding(String):boolean
    618 
    619     /**
    620      * Returns true if the encoding name is a valid Java encoding.
    621      * This method does not verify that there is a decoder available
    622      * for this encoding, only that the characters are valid for an
    623      * Java encoding name.
    624      *
    625      * @param javaEncoding The Java encoding name.
    626      */
    627     public static boolean isValidJavaEncoding(String javaEncoding) {
    628         if (javaEncoding != null) {
    629             int length = javaEncoding.length();
    630             if (length > 0) {
    631                 for (int i = 1; i < length; i++) {
    632                     char c = javaEncoding.charAt(i);
    633                     if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
    634                         (c < '0' || c > '9') && c != '.' && c != '_' &&
    635                         c != '-') {
    636                         return false;
    637                     }
    638                 }
    639                 return true;
    640             }
    641         }
    642         return false;
    643     } // isValidIANAEncoding(String):boolean
    644 
    645    /**
    646      * Simple check to determine if qname is legal. If it returns false
    647      * then <param>str</param> is illegal; if it returns true then
    648      * <param>str</param> is legal.
    649      */
    650     public static boolean isValidQName(String str) {
    651 
    652        final int colon = str.indexOf(':');
    653 
    654        if (colon == 0 || colon == str.length() - 1) {
    655            return false;
    656        }
    657 
    658        if (colon > 0) {
    659            final String prefix = str.substring(0,colon);
    660            final String localPart = str.substring(colon+1);
    661            return isValidNCName(prefix) && isValidNCName(localPart);
    662        }
    663        else {
    664            return isValidNCName(str);
    665        }
    666     }
    667 
    668 } // class XMLChar
    669