Home | History | Annotate | Download | only in encodings
      1 """ Python 'utf-16' Codec
      2 
      3 
      4 Written by Marc-Andre Lemburg (mal (at] lemburg.com).
      5 
      6 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
      7 
      8 """
      9 import codecs, sys
     10 
     11 ### Codec APIs
     12 
     13 encode = codecs.utf_16_encode
     14 
     15 def decode(input, errors='strict'):
     16     return codecs.utf_16_decode(input, errors, True)
     17 
     18 class IncrementalEncoder(codecs.IncrementalEncoder):
     19     def __init__(self, errors='strict'):
     20         codecs.IncrementalEncoder.__init__(self, errors)
     21         self.encoder = None
     22 
     23     def encode(self, input, final=False):
     24         if self.encoder is None:
     25             result = codecs.utf_16_encode(input, self.errors)[0]
     26             if sys.byteorder == 'little':
     27                 self.encoder = codecs.utf_16_le_encode
     28             else:
     29                 self.encoder = codecs.utf_16_be_encode
     30             return result
     31         return self.encoder(input, self.errors)[0]
     32 
     33     def reset(self):
     34         codecs.IncrementalEncoder.reset(self)
     35         self.encoder = None
     36 
     37     def getstate(self):
     38         # state info we return to the caller:
     39         # 0: stream is in natural order for this platform
     40         # 2: endianness hasn't been determined yet
     41         # (we're never writing in unnatural order)
     42         return (2 if self.encoder is None else 0)
     43 
     44     def setstate(self, state):
     45         if state:
     46             self.encoder = None
     47         else:
     48             if sys.byteorder == 'little':
     49                 self.encoder = codecs.utf_16_le_encode
     50             else:
     51                 self.encoder = codecs.utf_16_be_encode
     52 
     53 class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
     54     def __init__(self, errors='strict'):
     55         codecs.BufferedIncrementalDecoder.__init__(self, errors)
     56         self.decoder = None
     57 
     58     def _buffer_decode(self, input, errors, final):
     59         if self.decoder is None:
     60             (output, consumed, byteorder) = \
     61                 codecs.utf_16_ex_decode(input, errors, 0, final)
     62             if byteorder == -1:
     63                 self.decoder = codecs.utf_16_le_decode
     64             elif byteorder == 1:
     65                 self.decoder = codecs.utf_16_be_decode
     66             elif consumed >= 2:
     67                 raise UnicodeError("UTF-16 stream does not start with BOM")
     68             return (output, consumed)
     69         return self.decoder(input, self.errors, final)
     70 
     71     def reset(self):
     72         codecs.BufferedIncrementalDecoder.reset(self)
     73         self.decoder = None
     74 
     75 class StreamWriter(codecs.StreamWriter):
     76     def __init__(self, stream, errors='strict'):
     77         codecs.StreamWriter.__init__(self, stream, errors)
     78         self.encoder = None
     79 
     80     def reset(self):
     81         codecs.StreamWriter.reset(self)
     82         self.encoder = None
     83 
     84     def encode(self, input, errors='strict'):
     85         if self.encoder is None:
     86             result = codecs.utf_16_encode(input, errors)
     87             if sys.byteorder == 'little':
     88                 self.encoder = codecs.utf_16_le_encode
     89             else:
     90                 self.encoder = codecs.utf_16_be_encode
     91             return result
     92         else:
     93             return self.encoder(input, errors)
     94 
     95 class StreamReader(codecs.StreamReader):
     96 
     97     def reset(self):
     98         codecs.StreamReader.reset(self)
     99         try:
    100             del self.decode
    101         except AttributeError:
    102             pass
    103 
    104     def decode(self, input, errors='strict'):
    105         (object, consumed, byteorder) = \
    106             codecs.utf_16_ex_decode(input, errors, 0, False)
    107         if byteorder == -1:
    108             self.decode = codecs.utf_16_le_decode
    109         elif byteorder == 1:
    110             self.decode = codecs.utf_16_be_decode
    111         elif consumed>=2:
    112             raise UnicodeError,"UTF-16 stream does not start with BOM"
    113         return (object, consumed)
    114 
    115 ### encodings module API
    116 
    117 def getregentry():
    118     return codecs.CodecInfo(
    119         name='utf-16',
    120         encode=encode,
    121         decode=decode,
    122         incrementalencoder=IncrementalEncoder,
    123         incrementaldecoder=IncrementalDecoder,
    124         streamreader=StreamReader,
    125         streamwriter=StreamWriter,
    126     )
    127