Home | History | Annotate | Download | only in test
      1 from test.support import open_urlresource
      2 import unittest
      3 
      4 from http.client import HTTPException
      5 import sys
      6 from unicodedata import normalize, unidata_version
      7 
      8 TESTDATAFILE = "NormalizationTest.txt"
      9 TESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE
     10 
     11 def check_version(testfile):
     12     hdr = testfile.readline()
     13     return unidata_version in hdr
     14 
     15 class RangeError(Exception):
     16     pass
     17 
     18 def NFC(str):
     19     return normalize("NFC", str)
     20 
     21 def NFKC(str):
     22     return normalize("NFKC", str)
     23 
     24 def NFD(str):
     25     return normalize("NFD", str)
     26 
     27 def NFKD(str):
     28     return normalize("NFKD", str)
     29 
     30 def unistr(data):
     31     data = [int(x, 16) for x in data.split(" ")]
     32     for x in data:
     33         if x > sys.maxunicode:
     34             raise RangeError
     35     return "".join([chr(x) for x in data])
     36 
     37 class NormalizationTest(unittest.TestCase):
     38     def test_main(self):
     39         part = None
     40         part1_data = {}
     41         # Hit the exception early
     42         try:
     43             testdata = open_urlresource(TESTDATAURL, encoding="utf-8",
     44                                         check=check_version)
     45         except (OSError, HTTPException):
     46             self.skipTest("Could not retrieve " + TESTDATAURL)
     47         self.addCleanup(testdata.close)
     48         for line in testdata:
     49             if '#' in line:
     50                 line = line.split('#')[0]
     51             line = line.strip()
     52             if not line:
     53                 continue
     54             if line.startswith("@Part"):
     55                 part = line.split()[0]
     56                 continue
     57             try:
     58                 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
     59             except RangeError:
     60                 # Skip unsupported characters;
     61                 # try at least adding c1 if we are in part1
     62                 if part == "@Part1":
     63                     try:
     64                         c1 = unistr(line.split(';')[0])
     65                     except RangeError:
     66                         pass
     67                     else:
     68                         part1_data[c1] = 1
     69                 continue
     70 
     71             # Perform tests
     72             self.assertTrue(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line)
     73             self.assertTrue(c4 ==  NFC(c4) ==  NFC(c5), line)
     74             self.assertTrue(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line)
     75             self.assertTrue(c5 ==  NFD(c4) ==  NFD(c5), line)
     76             self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
     77                             NFKC(c3) == NFKC(c4) == NFKC(c5),
     78                             line)
     79             self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
     80                             NFKD(c3) == NFKD(c4) == NFKD(c5),
     81                             line)
     82 
     83             # Record part 1 data
     84             if part == "@Part1":
     85                 part1_data[c1] = 1
     86 
     87         # Perform tests for all other data
     88         for c in range(sys.maxunicode+1):
     89             X = chr(c)
     90             if X in part1_data:
     91                 continue
     92             self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
     93 
     94     def test_bug_834676(self):
     95         # Check for bug 834676
     96         normalize('NFC', '\ud55c\uae00')
     97 
     98 
     99 if __name__ == "__main__":
    100     unittest.main()
    101