Home | History | Annotate | Download | only in test
      1 from test.test_support import run_unittest, open_urlresource
      2 import unittest
      3 
      4 from httplib import HTTPException
      5 import sys
      6 import os
      7 from unicodedata import normalize, unidata_version
      8 
      9 TESTDATAFILE = "NormalizationTest.txt"
     10 TESTDATAURL = "http://www.unicode.org/Public/" + unidata_version + "/ucd/" + TESTDATAFILE
     11 
     12 def check_version(testfile):
     13     hdr = testfile.readline()
     14     return unidata_version in hdr
     15 
     16 class RangeError(Exception):
     17     pass
     18 
     19 def NFC(str):
     20     return normalize("NFC", str)
     21 
     22 def NFKC(str):
     23     return normalize("NFKC", str)
     24 
     25 def NFD(str):
     26     return normalize("NFD", str)
     27 
     28 def NFKD(str):
     29     return normalize("NFKD", str)
     30 
     31 def unistr(data):
     32     data = [int(x, 16) for x in data.split(" ")]
     33     for x in data:
     34         if x > sys.maxunicode:
     35             raise RangeError
     36     return u"".join([unichr(x) for x in data])
     37 
     38 class NormalizationTest(unittest.TestCase):
     39     def test_main(self):
     40         part = None
     41         part1_data = {}
     42         # Hit the exception early
     43         try:
     44             testdata = open_urlresource(TESTDATAURL, check_version)
     45         except (IOError, HTTPException):
     46             self.skipTest("Could not retrieve " + TESTDATAURL)
     47         for line in testdata:
     48             if '#' in line:
     49                 line = line.split('#')[0]
     50             line = line.strip()
     51             if not line:
     52                 continue
     53             if line.startswith("@Part"):
     54                 part = line.split()[0]
     55                 continue
     56             try:
     57                 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
     58             except RangeError:
     59                 # Skip unsupported characters;
     60                 # try atleast adding c1 if we are in part1
     61                 if part == "@Part1":
     62                     try:
     63                         c1 = unistr(line.split(';')[0])
     64                     except RangeError:
     65                         pass
     66                     else:
     67                         part1_data[c1] = 1
     68                 continue
     69 
     70             # Perform tests
     71             self.assertTrue(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line)
     72             self.assertTrue(c4 ==  NFC(c4) ==  NFC(c5), line)
     73             self.assertTrue(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line)
     74             self.assertTrue(c5 ==  NFD(c4) ==  NFD(c5), line)
     75             self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
     76                             NFKC(c3) == NFKC(c4) == NFKC(c5),
     77                             line)
     78             self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
     79                             NFKD(c3) == NFKD(c4) == NFKD(c5),
     80                             line)
     81 
     82             # Record part 1 data
     83             if part == "@Part1":
     84                 part1_data[c1] = 1
     85 
     86         # Perform tests for all other data
     87         for c in range(sys.maxunicode+1):
     88             X = unichr(c)
     89             if X in part1_data:
     90                 continue
     91             self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
     92 
     93     def test_bug_834676(self):
     94         # Check for bug 834676
     95         normalize('NFC', u'\ud55c\uae00')
     96 
     97 
     98 def test_main():
     99     run_unittest(NormalizationTest)
    100 
    101 if __name__ == "__main__":
    102     test_main()
    103