Home | History | Annotate | Download | only in unicode
      1 #!/usr/bin/env python
      2 
      3 """ Compare the output of two codecs.
      4 
      5 (c) Copyright 2005, Marc-Andre Lemburg (mal (at] lemburg.com).
      6 
      7     Licensed to PSF under a Contributor Agreement.
      8 
      9 """
     10 import sys
     11 
     12 def compare_codecs(encoding1, encoding2):
     13 
     14     print 'Comparing encoding/decoding of   %r and   %r' % (encoding1, encoding2)
     15     mismatch = 0
     16     # Check encoding
     17     for i in range(sys.maxunicode):
     18         u = unichr(i)
     19         try:
     20             c1 = u.encode(encoding1)
     21         except UnicodeError, reason:
     22             c1 = '<undefined>'
     23         try:
     24             c2 = u.encode(encoding2)
     25         except UnicodeError, reason:
     26             c2 = '<undefined>'
     27         if c1 != c2:
     28             print ' * encoding mismatch for 0x%04X: %-14r != %r' % \
     29                   (i, c1, c2)
     30             mismatch += 1
     31     # Check decoding
     32     for i in range(256):
     33         c = chr(i)
     34         try:
     35             u1 = c.decode(encoding1)
     36         except UnicodeError:
     37             u1 = u'<undefined>'
     38         try:
     39             u2 = c.decode(encoding2)
     40         except UnicodeError:
     41             u2 = u'<undefined>'
     42         if u1 != u2:
     43             print ' * decoding mismatch for 0x%04X: %-14r != %r' % \
     44                   (i, u1, u2)
     45             mismatch += 1
     46     if mismatch:
     47         print
     48         print 'Found %i mismatches' % mismatch
     49     else:
     50         print '-> Codecs are identical.'
     51 
     52 if __name__ == '__main__':
     53     compare_codecs(sys.argv[1], sys.argv[2])
     54