1 from test.support import open_urlresource 2 import unittest 3 4 from http.client import HTTPException 5 import sys 6 from unicodedata import normalize, unidata_version 7 8 TESTDATAFILE = "NormalizationTest.txt" 9 TESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE 10 11 def check_version(testfile): 12 hdr = testfile.readline() 13 return unidata_version in hdr 14 15 class RangeError(Exception): 16 pass 17 18 def NFC(str): 19 return normalize("NFC", str) 20 21 def NFKC(str): 22 return normalize("NFKC", str) 23 24 def NFD(str): 25 return normalize("NFD", str) 26 27 def NFKD(str): 28 return normalize("NFKD", str) 29 30 def unistr(data): 31 data = [int(x, 16) for x in data.split(" ")] 32 for x in data: 33 if x > sys.maxunicode: 34 raise RangeError 35 return "".join([chr(x) for x in data]) 36 37 class NormalizationTest(unittest.TestCase): 38 def test_main(self): 39 part = None 40 part1_data = {} 41 # Hit the exception early 42 try: 43 testdata = open_urlresource(TESTDATAURL, encoding="utf-8", 44 check=check_version) 45 except (OSError, HTTPException): 46 self.skipTest("Could not retrieve " + TESTDATAURL) 47 self.addCleanup(testdata.close) 48 for line in testdata: 49 if '#' in line: 50 line = line.split('#')[0] 51 line = line.strip() 52 if not line: 53 continue 54 if line.startswith("@Part"): 55 part = line.split()[0] 56 continue 57 try: 58 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] 59 except RangeError: 60 # Skip unsupported characters; 61 # try at least adding c1 if we are in part1 62 if part == "@Part1": 63 try: 64 c1 = unistr(line.split(';')[0]) 65 except RangeError: 66 pass 67 else: 68 part1_data[c1] = 1 69 continue 70 71 # Perform tests 72 self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) 73 self.assertTrue(c4 == NFC(c4) == NFC(c5), line) 74 self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) 75 self.assertTrue(c5 == NFD(c4) == NFD(c5), line) 76 self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \ 77 NFKC(c3) == NFKC(c4) == NFKC(c5), 78 line) 79 self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \ 80 NFKD(c3) == NFKD(c4) == NFKD(c5), 81 line) 82 83 # Record part 1 data 84 if part == "@Part1": 85 part1_data[c1] = 1 86 87 # Perform tests for all other data 88 for c in range(sys.maxunicode+1): 89 X = chr(c) 90 if X in part1_data: 91 continue 92 self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c) 93 94 def test_bug_834676(self): 95 # Check for bug 834676 96 normalize('NFC', '\ud55c\uae00') 97 98 99 if __name__ == "__main__": 100 unittest.main() 101