Home | History | Annotate | Download | only in sax
      1 """
      2 This module contains the core classes of version 2.0 of SAX for Python.
      3 This file provides only default classes with absolutely minimum
      4 functionality, from which drivers and applications can be subclassed.
      5 
      6 Many of these classes are empty and are included only as documentation
      7 of the interfaces.
      8 
      9 $Id$
     10 """
     11 
     12 version = '2.0beta'
     13 
     14 #============================================================================
     15 #
     16 # HANDLER INTERFACES
     17 #
     18 #============================================================================
     19 
     20 # ===== ERRORHANDLER =====
     21 
     22 class ErrorHandler:
     23     """Basic interface for SAX error handlers.
     24 
     25     If you create an object that implements this interface, then
     26     register the object with your XMLReader, the parser will call the
     27     methods in your object to report all warnings and errors. There
     28     are three levels of errors available: warnings, (possibly)
     29     recoverable errors, and unrecoverable errors. All methods take a
     30     SAXParseException as the only parameter."""
     31 
     32     def error(self, exception):
     33         "Handle a recoverable error."
     34         raise exception
     35 
     36     def fatalError(self, exception):
     37         "Handle a non-recoverable error."
     38         raise exception
     39 
     40     def warning(self, exception):
     41         "Handle a warning."
     42         print exception
     43 
     44 
     45 # ===== CONTENTHANDLER =====
     46 
     47 class ContentHandler:
     48     """Interface for receiving logical document content events.
     49 
     50     This is the main callback interface in SAX, and the one most
     51     important to applications. The order of events in this interface
     52     mirrors the order of the information in the document."""
     53 
     54     def __init__(self):
     55         self._locator = None
     56 
     57     def setDocumentLocator(self, locator):
     58         """Called by the parser to give the application a locator for
     59         locating the origin of document events.
     60 
     61         SAX parsers are strongly encouraged (though not absolutely
     62         required) to supply a locator: if it does so, it must supply
     63         the locator to the application by invoking this method before
     64         invoking any of the other methods in the DocumentHandler
     65         interface.
     66 
     67         The locator allows the application to determine the end
     68         position of any document-related event, even if the parser is
     69         not reporting an error. Typically, the application will use
     70         this information for reporting its own errors (such as
     71         character content that does not match an application's
     72         business rules). The information returned by the locator is
     73         probably not sufficient for use with a search engine.
     74 
     75         Note that the locator will return correct information only
     76         during the invocation of the events in this interface. The
     77         application should not attempt to use it at any other time."""
     78         self._locator = locator
     79 
     80     def startDocument(self):
     81         """Receive notification of the beginning of a document.
     82 
     83         The SAX parser will invoke this method only once, before any
     84         other methods in this interface or in DTDHandler (except for
     85         setDocumentLocator)."""
     86 
     87     def endDocument(self):
     88         """Receive notification of the end of a document.
     89 
     90         The SAX parser will invoke this method only once, and it will
     91         be the last method invoked during the parse. The parser shall
     92         not invoke this method until it has either abandoned parsing
     93         (because of an unrecoverable error) or reached the end of
     94         input."""
     95 
     96     def startPrefixMapping(self, prefix, uri):
     97         """Begin the scope of a prefix-URI Namespace mapping.
     98 
     99         The information from this event is not necessary for normal
    100         Namespace processing: the SAX XML reader will automatically
    101         replace prefixes for element and attribute names when the
    102         http://xml.org/sax/features/namespaces feature is true (the
    103         default).
    104 
    105         There are cases, however, when applications need to use
    106         prefixes in character data or in attribute values, where they
    107         cannot safely be expanded automatically; the
    108         start/endPrefixMapping event supplies the information to the
    109         application to expand prefixes in those contexts itself, if
    110         necessary.
    111 
    112         Note that start/endPrefixMapping events are not guaranteed to
    113         be properly nested relative to each-other: all
    114         startPrefixMapping events will occur before the corresponding
    115         startElement event, and all endPrefixMapping events will occur
    116         after the corresponding endElement event, but their order is
    117         not guaranteed."""
    118 
    119     def endPrefixMapping(self, prefix):
    120         """End the scope of a prefix-URI mapping.
    121 
    122         See startPrefixMapping for details. This event will always
    123         occur after the corresponding endElement event, but the order
    124         of endPrefixMapping events is not otherwise guaranteed."""
    125 
    126     def startElement(self, name, attrs):
    127         """Signals the start of an element in non-namespace mode.
    128 
    129         The name parameter contains the raw XML 1.0 name of the
    130         element type as a string and the attrs parameter holds an
    131         instance of the Attributes class containing the attributes of
    132         the element."""
    133 
    134     def endElement(self, name):
    135         """Signals the end of an element in non-namespace mode.
    136 
    137         The name parameter contains the name of the element type, just
    138         as with the startElement event."""
    139 
    140     def startElementNS(self, name, qname, attrs):
    141         """Signals the start of an element in namespace mode.
    142 
    143         The name parameter contains the name of the element type as a
    144         (uri, localname) tuple, the qname parameter the raw XML 1.0
    145         name used in the source document, and the attrs parameter
    146         holds an instance of the Attributes class containing the
    147         attributes of the element.
    148 
    149         The uri part of the name tuple is None for elements which have
    150         no namespace."""
    151 
    152     def endElementNS(self, name, qname):
    153         """Signals the end of an element in namespace mode.
    154 
    155         The name parameter contains the name of the element type, just
    156         as with the startElementNS event."""
    157 
    158     def characters(self, content):
    159         """Receive notification of character data.
    160 
    161         The Parser will call this method to report each chunk of
    162         character data. SAX parsers may return all contiguous
    163         character data in a single chunk, or they may split it into
    164         several chunks; however, all of the characters in any single
    165         event must come from the same external entity so that the
    166         Locator provides useful information."""
    167 
    168     def ignorableWhitespace(self, whitespace):
    169         """Receive notification of ignorable whitespace in element content.
    170 
    171         Validating Parsers must use this method to report each chunk
    172         of ignorable whitespace (see the W3C XML 1.0 recommendation,
    173         section 2.10): non-validating parsers may also use this method
    174         if they are capable of parsing and using content models.
    175 
    176         SAX parsers may return all contiguous whitespace in a single
    177         chunk, or they may split it into several chunks; however, all
    178         of the characters in any single event must come from the same
    179         external entity, so that the Locator provides useful
    180         information."""
    181 
    182     def processingInstruction(self, target, data):
    183         """Receive notification of a processing instruction.
    184 
    185         The Parser will invoke this method once for each processing
    186         instruction found: note that processing instructions may occur
    187         before or after the main document element.
    188 
    189         A SAX parser should never report an XML declaration (XML 1.0,
    190         section 2.8) or a text declaration (XML 1.0, section 4.3.1)
    191         using this method."""
    192 
    193     def skippedEntity(self, name):
    194         """Receive notification of a skipped entity.
    195 
    196         The Parser will invoke this method once for each entity
    197         skipped. Non-validating processors may skip entities if they
    198         have not seen the declarations (because, for example, the
    199         entity was declared in an external DTD subset). All processors
    200         may skip external entities, depending on the values of the
    201         http://xml.org/sax/features/external-general-entities and the
    202         http://xml.org/sax/features/external-parameter-entities
    203         properties."""
    204 
    205 
    206 # ===== DTDHandler =====
    207 
    208 class DTDHandler:
    209     """Handle DTD events.
    210 
    211     This interface specifies only those DTD events required for basic
    212     parsing (unparsed entities and attributes)."""
    213 
    214     def notationDecl(self, name, publicId, systemId):
    215         "Handle a notation declaration event."
    216 
    217     def unparsedEntityDecl(self, name, publicId, systemId, ndata):
    218         "Handle an unparsed entity declaration event."
    219 
    220 
    221 # ===== ENTITYRESOLVER =====
    222 
    223 class EntityResolver:
    224     """Basic interface for resolving entities. If you create an object
    225     implementing this interface, then register the object with your
    226     Parser, the parser will call the method in your object to
    227     resolve all external entities. Note that DefaultHandler implements
    228     this interface with the default behaviour."""
    229 
    230     def resolveEntity(self, publicId, systemId):
    231         """Resolve the system identifier of an entity and return either
    232         the system identifier to read from as a string, or an InputSource
    233         to read from."""
    234         return systemId
    235 
    236 
    237 #============================================================================
    238 #
    239 # CORE FEATURES
    240 #
    241 #============================================================================
    242 
    243 feature_namespaces = "http://xml.org/sax/features/namespaces"
    244 # true: Perform Namespace processing (default).
    245 # false: Optionally do not perform Namespace processing
    246 #        (implies namespace-prefixes).
    247 # access: (parsing) read-only; (not parsing) read/write
    248 
    249 feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
    250 # true: Report the original prefixed names and attributes used for Namespace
    251 #       declarations.
    252 # false: Do not report attributes used for Namespace declarations, and
    253 #        optionally do not report original prefixed names (default).
    254 # access: (parsing) read-only; (not parsing) read/write
    255 
    256 feature_string_interning = "http://xml.org/sax/features/string-interning"
    257 # true: All element names, prefixes, attribute names, Namespace URIs, and
    258 #       local names are interned using the built-in intern function.
    259 # false: Names are not necessarily interned, although they may be (default).
    260 # access: (parsing) read-only; (not parsing) read/write
    261 
    262 feature_validation = "http://xml.org/sax/features/validation"
    263 # true: Report all validation errors (implies external-general-entities and
    264 #       external-parameter-entities).
    265 # false: Do not report validation errors.
    266 # access: (parsing) read-only; (not parsing) read/write
    267 
    268 feature_external_ges = "http://xml.org/sax/features/external-general-entities"
    269 # true: Include all external general (text) entities.
    270 # false: Do not include external general entities.
    271 # access: (parsing) read-only; (not parsing) read/write
    272 
    273 feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
    274 # true: Include all external parameter entities, including the external
    275 #       DTD subset.
    276 # false: Do not include any external parameter entities, even the external
    277 #        DTD subset.
    278 # access: (parsing) read-only; (not parsing) read/write
    279 
    280 all_features = [feature_namespaces,
    281                 feature_namespace_prefixes,
    282                 feature_string_interning,
    283                 feature_validation,
    284                 feature_external_ges,
    285                 feature_external_pes]
    286 
    287 
    288 #============================================================================
    289 #
    290 # CORE PROPERTIES
    291 #
    292 #============================================================================
    293 
    294 property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
    295 # data type: xml.sax.sax2lib.LexicalHandler
    296 # description: An optional extension handler for lexical events like comments.
    297 # access: read/write
    298 
    299 property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
    300 # data type: xml.sax.sax2lib.DeclHandler
    301 # description: An optional extension handler for DTD-related events other
    302 #              than notations and unparsed entities.
    303 # access: read/write
    304 
    305 property_dom_node = "http://xml.org/sax/properties/dom-node"
    306 # data type: org.w3c.dom.Node
    307 # description: When parsing, the current DOM node being visited if this is
    308 #              a DOM iterator; when not parsing, the root DOM node for
    309 #              iteration.
    310 # access: (parsing) read-only; (not parsing) read/write
    311 
    312 property_xml_string = "http://xml.org/sax/properties/xml-string"
    313 # data type: String
    314 # description: The literal string of characters that was the source for
    315 #              the current event.
    316 # access: read-only
    317 
    318 property_encoding = "http://www.python.org/sax/properties/encoding"
    319 # data type: String
    320 # description: The name of the encoding to assume for input data.
    321 # access: write: set the encoding, e.g. established by a higher-level
    322 #                protocol. May change during parsing (e.g. after
    323 #                processing a META tag)
    324 #         read:  return the current encoding (possibly established through
    325 #                auto-detection.
    326 # initial value: UTF-8
    327 #
    328 
    329 property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
    330 # data type: Dictionary
    331 # description: The dictionary used to intern common strings in the document
    332 # access: write: Request that the parser uses a specific dictionary, to
    333 #                allow interning across different documents
    334 #         read:  return the current interning dictionary, or None
    335 #
    336 
    337 all_properties = [property_lexical_handler,
    338                   property_dom_node,
    339                   property_declaration_handler,
    340                   property_xml_string,
    341                   property_encoding,
    342                   property_interning_dict]
    343