Home | History | Annotate | Download | only in dom
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package org.apache.harmony.xml.dom;
     18 
     19 import java.util.Map;
     20 import java.util.TreeMap;
     21 import org.w3c.dom.DOMConfiguration;
     22 import org.w3c.dom.DOMError;
     23 import org.w3c.dom.DOMErrorHandler;
     24 import org.w3c.dom.DOMException;
     25 import org.w3c.dom.DOMStringList;
     26 import org.w3c.dom.NamedNodeMap;
     27 import org.w3c.dom.Node;
     28 
     29 /**
     30  * A minimal implementation of DOMConfiguration. This implementation uses inner
     31  * parameter instances to centralize each parameter's behavior.
     32  */
     33 public final class DOMConfigurationImpl implements DOMConfiguration {
     34 
     35     private static final Map<String, Parameter> PARAMETERS
     36             = new TreeMap<String, Parameter>(String.CASE_INSENSITIVE_ORDER);
     37 
     38     static {
     39         /*
     40          * True to canonicalize the document (unsupported). This includes
     41          * removing DocumentType nodes from the tree and removing unused
     42          * namespace declarations. Setting this to true also sets these
     43          * parameters:
     44          *   entities = false
     45          *   normalize-characters = false
     46          *   cdata-sections = false
     47          *   namespaces = true
     48          *   namespace-declarations = true
     49          *   well-formed = true
     50          *   element-content-whitespace = true
     51          * Setting these parameters to another value shall revert the canonical
     52          * form to false.
     53          */
     54         PARAMETERS.put("canonical-form", new FixedParameter(false));
     55 
     56         /*
     57          * True to keep existing CDATA nodes; false to replace them/merge them
     58          * into adjacent text nodes.
     59          */
     60         PARAMETERS.put("cdata-sections", new BooleanParameter() {
     61             public Object get(DOMConfigurationImpl config) {
     62                 return config.cdataSections;
     63             }
     64             public void set(DOMConfigurationImpl config, Object value) {
     65                 config.cdataSections = (Boolean) value;
     66             }
     67         });
     68 
     69         /*
     70          * True to check character normalization (unsupported).
     71          */
     72         PARAMETERS.put("check-character-normalization", new FixedParameter(false));
     73 
     74         /*
     75          * True to keep comments in the document; false to discard them.
     76          */
     77         PARAMETERS.put("comments", new BooleanParameter() {
     78             public Object get(DOMConfigurationImpl config) {
     79                 return config.comments;
     80             }
     81             public void set(DOMConfigurationImpl config, Object value) {
     82                 config.comments = (Boolean) value;
     83             }
     84         });
     85 
     86         /*
     87          * True to expose schema normalized values. Setting this to true sets
     88          * the validate parameter to true. Has no effect when validate is false.
     89          */
     90         PARAMETERS.put("datatype-normalization", new BooleanParameter() {
     91             public Object get(DOMConfigurationImpl config) {
     92                 return config.datatypeNormalization;
     93             }
     94             public void set(DOMConfigurationImpl config, Object value) {
     95                 if ((Boolean) value) {
     96                     config.datatypeNormalization = true;
     97                     config.validate = true;
     98                 } else {
     99                     config.datatypeNormalization = false;
    100                 }
    101             }
    102         });
    103 
    104         /*
    105          * True to keep whitespace elements in the document; false to discard
    106          * them (unsupported).
    107          */
    108         PARAMETERS.put("element-content-whitespace", new FixedParameter(true));
    109 
    110         /*
    111          * True to keep entity references in the document; false to expand them.
    112          */
    113         PARAMETERS.put("entities", new BooleanParameter() {
    114             public Object get(DOMConfigurationImpl config) {
    115                 return config.entities;
    116             }
    117             public void set(DOMConfigurationImpl config, Object value) {
    118                 config.entities = (Boolean) value;
    119             }
    120         });
    121 
    122         /*
    123          * Handler to be invoked when errors are encountered.
    124          */
    125         PARAMETERS.put("error-handler", new Parameter() {
    126             public Object get(DOMConfigurationImpl config) {
    127                 return config.errorHandler;
    128             }
    129             public void set(DOMConfigurationImpl config, Object value) {
    130                 config.errorHandler = (DOMErrorHandler) value;
    131             }
    132             public boolean canSet(DOMConfigurationImpl config, Object value) {
    133                 return value == null || value instanceof DOMErrorHandler;
    134             }
    135         });
    136 
    137         /*
    138          * Bulk alias to set the following parameter values:
    139          *   validate-if-schema = false
    140          *   entities = false
    141          *   datatype-normalization = false
    142          *   cdata-sections = false
    143          *   namespace-declarations = true
    144          *   well-formed = true
    145          *   element-content-whitespace = true
    146          *   comments = true
    147          *   namespaces = true.
    148          * Querying this returns true if all of the above parameters have the
    149          * listed values; false otherwise.
    150          */
    151         PARAMETERS.put("infoset", new BooleanParameter() {
    152             public Object get(DOMConfigurationImpl config) {
    153                 // validate-if-schema is always false
    154                 // element-content-whitespace is always true
    155                 // namespace-declarations is always true
    156                 return !config.entities
    157                         && !config.datatypeNormalization
    158                         && !config.cdataSections
    159                         && config.wellFormed
    160                         && config.comments
    161                         && config.namespaces;
    162             }
    163             public void set(DOMConfigurationImpl config, Object value) {
    164                 if ((Boolean) value) {
    165                     // validate-if-schema is always false
    166                     // element-content-whitespace is always true
    167                     // namespace-declarations is always true
    168                     config.entities = false;
    169                     config.datatypeNormalization = false;
    170                     config.cdataSections = false;
    171                     config.wellFormed = true;
    172                     config.comments = true;
    173                     config.namespaces = true;
    174                 }
    175             }
    176         });
    177 
    178         /*
    179          * True to perform namespace processing; false for none.
    180          */
    181         PARAMETERS.put("namespaces", new BooleanParameter() {
    182             public Object get(DOMConfigurationImpl config) {
    183                 return config.namespaces;
    184             }
    185             public void set(DOMConfigurationImpl config, Object value) {
    186                 config.namespaces = (Boolean) value;
    187             }
    188         });
    189 
    190         /**
    191          * True to include namespace declarations; false to discard them
    192          * (unsupported). Even when namespace declarations are discarded,
    193          * prefixes are retained.
    194          *
    195          * Has no effect if namespaces is false.
    196          */
    197         PARAMETERS.put("namespace-declarations", new FixedParameter(true));
    198 
    199         /*
    200          * True to fully normalize characters (unsupported).
    201          */
    202         PARAMETERS.put("normalize-characters", new FixedParameter(false));
    203 
    204         /*
    205          * A list of whitespace-separated URIs representing the schemas to validate
    206          * against. Has no effect if schema-type is null.
    207          */
    208         PARAMETERS.put("schema-location", new Parameter() {
    209             public Object get(DOMConfigurationImpl config) {
    210                 return config.schemaLocation;
    211             }
    212             public void set(DOMConfigurationImpl config, Object value) {
    213                 config.schemaLocation = (String) value;
    214             }
    215             public boolean canSet(DOMConfigurationImpl config, Object value) {
    216                 return value == null || value instanceof String;
    217             }
    218         });
    219 
    220         /*
    221          * URI representing the type of schema language, such as
    222          * "http://www.w3.org/2001/XMLSchema" or "http://www.w3.org/TR/REC-xml".
    223          */
    224         PARAMETERS.put("schema-type", new Parameter() {
    225             public Object get(DOMConfigurationImpl config) {
    226                 return config.schemaType;
    227             }
    228             public void set(DOMConfigurationImpl config, Object value) {
    229                 config.schemaType = (String) value;
    230             }
    231             public boolean canSet(DOMConfigurationImpl config, Object value) {
    232                 return value == null || value instanceof String;
    233             }
    234         });
    235 
    236         /*
    237          * True to split CDATA sections containing "]]>"; false to signal an
    238          * error instead.
    239          */
    240         PARAMETERS.put("split-cdata-sections", new BooleanParameter() {
    241             public Object get(DOMConfigurationImpl config) {
    242                 return config.splitCdataSections;
    243             }
    244             public void set(DOMConfigurationImpl config, Object value) {
    245                 config.splitCdataSections = (Boolean) value;
    246             }
    247         });
    248 
    249         /*
    250          * True to require validation against a schema or DTD. Validation will
    251          * recompute element content whitespace, ID and schema type data.
    252          *
    253          * Setting this unsets validate-if-schema.
    254          */
    255         PARAMETERS.put("validate", new BooleanParameter() {
    256             public Object get(DOMConfigurationImpl config) {
    257                 return config.validate;
    258             }
    259             public void set(DOMConfigurationImpl config, Object value) {
    260                 // validate-if-schema is always false
    261                 config.validate = (Boolean) value;
    262             }
    263         });
    264 
    265         /*
    266          * True to validate if a schema was declared (unsupported). Setting this
    267          * unsets validate.
    268          */
    269         PARAMETERS.put("validate-if-schema", new FixedParameter(false));
    270 
    271         /*
    272          * True to report invalid characters in node names, attributes, elements,
    273          * comments, text, CDATA sections and processing instructions.
    274          */
    275         PARAMETERS.put("well-formed", new BooleanParameter() {
    276             public Object get(DOMConfigurationImpl config) {
    277                 return config.wellFormed;
    278             }
    279             public void set(DOMConfigurationImpl config, Object value) {
    280                 config.wellFormed = (Boolean) value;
    281             }
    282         });
    283 
    284         // TODO add "resource-resolver" property for use with LS feature...
    285     }
    286 
    287     private boolean cdataSections = true;
    288     private boolean comments = true;
    289     private boolean datatypeNormalization = false;
    290     private boolean entities = true;
    291     private DOMErrorHandler errorHandler;
    292     private boolean namespaces = true;
    293     private String schemaLocation;
    294     private String schemaType;
    295     private boolean splitCdataSections = true;
    296     private boolean validate = false;
    297     private boolean wellFormed = true;
    298 
    299     interface Parameter {
    300         Object get(DOMConfigurationImpl config);
    301         void set(DOMConfigurationImpl config, Object value);
    302         boolean canSet(DOMConfigurationImpl config, Object value);
    303     }
    304 
    305     static class FixedParameter implements Parameter {
    306         final Object onlyValue;
    307         FixedParameter(Object onlyValue) {
    308             this.onlyValue = onlyValue;
    309         }
    310         public Object get(DOMConfigurationImpl config) {
    311             return onlyValue;
    312         }
    313         public void set(DOMConfigurationImpl config, Object value) {
    314             if (!onlyValue.equals(value)) {
    315                 throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
    316                         "Unsupported value: " + value);
    317             }
    318         }
    319         public boolean canSet(DOMConfigurationImpl config, Object value) {
    320             return onlyValue.equals(value);
    321         }
    322     }
    323 
    324     static abstract class BooleanParameter implements Parameter {
    325         public boolean canSet(DOMConfigurationImpl config, Object value) {
    326             return value instanceof Boolean;
    327         }
    328     }
    329 
    330     public boolean canSetParameter(String name, Object value) {
    331         Parameter parameter = PARAMETERS.get(name);
    332         return parameter != null && parameter.canSet(this, value);
    333     }
    334 
    335     public void setParameter(String name, Object value) throws DOMException {
    336         Parameter parameter = PARAMETERS.get(name);
    337         if (parameter == null) {
    338             throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name);
    339         }
    340         try {
    341             parameter.set(this, value);
    342         } catch (NullPointerException e) {
    343             throw new DOMException(DOMException.TYPE_MISMATCH_ERR,
    344                     "Null not allowed for " + name);
    345         } catch (ClassCastException e) {
    346             throw new DOMException(DOMException.TYPE_MISMATCH_ERR,
    347                     "Invalid type for " + name + ": " + value.getClass());
    348         }
    349     }
    350 
    351     public Object getParameter(String name) throws DOMException {
    352         Parameter parameter = PARAMETERS.get(name);
    353         if (parameter == null) {
    354             throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name);
    355         }
    356         return parameter.get(this);
    357     }
    358 
    359     public DOMStringList getParameterNames() {
    360         return internalGetParameterNames();
    361     }
    362 
    363     private static DOMStringList internalGetParameterNames() {
    364         final String[] result = PARAMETERS.keySet().toArray(new String[PARAMETERS.size()]);
    365         return new DOMStringList() {
    366             public String item(int index) {
    367                 return index < result.length ? result[index] : null;
    368             }
    369             public int getLength() {
    370                 return result.length;
    371             }
    372             public boolean contains(String str) {
    373                 return PARAMETERS.containsKey(str); // case-insensitive.
    374             }
    375         };
    376     }
    377 
    378     public void normalize(Node node) {
    379         /*
    380          * Since we don't validate, this code doesn't take into account the
    381          * following "supported" parameters: datatype-normalization, entities,
    382          * schema-location, schema-type, or validate.
    383          *
    384          * TODO: normalize namespaces
    385          */
    386 
    387         switch (node.getNodeType()) {
    388             case Node.CDATA_SECTION_NODE:
    389                 CDATASectionImpl cdata = (CDATASectionImpl) node;
    390                 if (cdataSections) {
    391                     if (cdata.needsSplitting()) {
    392                         if (splitCdataSections) {
    393                             cdata.split();
    394                             report(DOMError.SEVERITY_WARNING, "cdata-sections-splitted");
    395                         } else {
    396                             report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
    397                         }
    398                     }
    399                     checkTextValidity(cdata.buffer);
    400                     break;
    401                 }
    402                 node = cdata.replaceWithText();
    403                 // fall through
    404 
    405             case Node.TEXT_NODE:
    406                 TextImpl text = (TextImpl) node;
    407                 text = text.minimize();
    408                 if (text != null) {
    409                     checkTextValidity(text.buffer);
    410                 }
    411                 break;
    412 
    413             case Node.COMMENT_NODE:
    414                 CommentImpl comment = (CommentImpl) node;
    415                 if (!comments) {
    416                     comment.getParentNode().removeChild(comment);
    417                     break;
    418                 }
    419                 if (comment.containsDashDash()) {
    420                     report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
    421                 }
    422                 checkTextValidity(comment.buffer);
    423                 break;
    424 
    425             case Node.PROCESSING_INSTRUCTION_NODE:
    426                 checkTextValidity(((ProcessingInstructionImpl) node).getData());
    427                 break;
    428 
    429             case Node.ATTRIBUTE_NODE:
    430                 checkTextValidity(((AttrImpl) node).getValue());
    431                 break;
    432 
    433             case Node.ELEMENT_NODE:
    434                 ElementImpl element = (ElementImpl) node;
    435                 NamedNodeMap attributes = element.getAttributes();
    436                 for (int i = 0; i < attributes.getLength(); i++) {
    437                     normalize(attributes.item(i));
    438                 }
    439                 // fall through
    440 
    441             case Node.DOCUMENT_NODE:
    442             case Node.DOCUMENT_FRAGMENT_NODE:
    443                 Node next;
    444                 for (Node child = node.getFirstChild(); child != null; child = next) {
    445                     // lookup next eagerly because normalize() may remove its subject
    446                     next = child.getNextSibling();
    447                     normalize(child);
    448                 }
    449                 break;
    450 
    451             case Node.NOTATION_NODE:
    452             case Node.DOCUMENT_TYPE_NODE:
    453             case Node.ENTITY_NODE:
    454             case Node.ENTITY_REFERENCE_NODE:
    455                 break;
    456 
    457             default:
    458                 throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
    459                         "Unsupported node type " + node.getNodeType());
    460         }
    461     }
    462 
    463     private void checkTextValidity(CharSequence s) {
    464         if (wellFormed && !isValid(s)) {
    465             report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
    466         }
    467     }
    468 
    469     /**
    470      * Returns true if all of the characters in the text are permitted for use
    471      * in XML documents.
    472      */
    473     private boolean isValid(CharSequence text) {
    474         for (int i = 0; i < text.length(); i++) {
    475             char c = text.charAt(i);
    476             // as defined by http://www.w3.org/TR/REC-xml/#charsets.
    477             boolean valid = c == 0x9 || c == 0xA || c == 0xD
    478                     || (c >= 0x20 && c <= 0xd7ff)
    479                     || (c >= 0xe000 && c <= 0xfffd);
    480             if (!valid) {
    481                 return false;
    482             }
    483         }
    484         return true;
    485     }
    486 
    487     private void report(short severity, String type) {
    488         if (errorHandler != null) {
    489             // TODO: abort if handleError returns false
    490             errorHandler.handleError(new DOMErrorImpl(severity, type));
    491         }
    492     }
    493 }
    494