Home | History | Annotate | Download | only in tests
      1 #
      2 # Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
      3 #
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions
      6 # are met:
      7 #
      8 # 1. Redistributions of source code must retain the above copyright
      9 #    notice, this list of conditions and the following disclaimer.
     10 #
     11 # 2. Redistributions in binary form must reproduce the above copyright
     12 #    notice, this list of conditions and the following disclaimer in the
     13 #    documentation and/or other materials provided with the distribution.
     14 #
     15 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
     16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     21 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25 # SUCH DAMAGE.
     26 #
     27 
     28 
     29 # Generate PEP-3101 format strings.
     30 
     31 
     32 import os, sys, locale, random
     33 import platform, subprocess
     34 from test.support import import_fresh_module
     35 from distutils.spawn import find_executable
     36 
     37 C = import_fresh_module('decimal', fresh=['_decimal'])
     38 P = import_fresh_module('decimal', blocked=['_decimal'])
     39 
     40 
     41 windows_lang_strings = [
     42   "chinese", "chinese-simplified", "chinese-traditional", "czech", "danish",
     43   "dutch", "belgian", "english", "australian", "canadian", "english-nz",
     44   "english-uk", "english-us", "finnish", "french", "french-belgian",
     45   "french-canadian", "french-swiss", "german", "german-austrian",
     46   "german-swiss", "greek", "hungarian", "icelandic", "italian", "italian-swiss",
     47   "japanese", "korean", "norwegian", "norwegian-bokmal", "norwegian-nynorsk",
     48   "polish", "portuguese", "portuguese-brazil", "russian", "slovak", "spanish",
     49   "spanish-mexican", "spanish-modern", "swedish", "turkish",
     50 ]
     51 
     52 preferred_encoding = {
     53   'cs_CZ': 'ISO8859-2',
     54   'cs_CZ.iso88592': 'ISO8859-2',
     55   'czech': 'ISO8859-2',
     56   'eesti': 'ISO8859-1',
     57   'estonian': 'ISO8859-1',
     58   'et_EE': 'ISO8859-15',
     59   'et_EE.ISO-8859-15': 'ISO8859-15',
     60   'et_EE.iso885915': 'ISO8859-15',
     61   'et_EE.iso88591': 'ISO8859-1',
     62   'fi_FI.iso88591': 'ISO8859-1',
     63   'fi_FI': 'ISO8859-15',
     64   'fi_FI@euro': 'ISO8859-15',
     65   'fi_FI.iso885915@euro': 'ISO8859-15',
     66   'finnish': 'ISO8859-1',
     67   'lv_LV': 'ISO8859-13',
     68   'lv_LV.iso885913': 'ISO8859-13',
     69   'nb_NO': 'ISO8859-1',
     70   'nb_NO.iso88591': 'ISO8859-1',
     71   'bokmal': 'ISO8859-1',
     72   'nn_NO': 'ISO8859-1',
     73   'nn_NO.iso88591': 'ISO8859-1',
     74   'no_NO': 'ISO8859-1',
     75   'norwegian': 'ISO8859-1',
     76   'nynorsk': 'ISO8859-1',
     77   'ru_RU': 'ISO8859-5',
     78   'ru_RU.iso88595': 'ISO8859-5',
     79   'russian': 'ISO8859-5',
     80   'ru_RU.KOI8-R': 'KOI8-R',
     81   'ru_RU.koi8r': 'KOI8-R',
     82   'ru_RU.CP1251': 'CP1251',
     83   'ru_RU.cp1251': 'CP1251',
     84   'sk_SK': 'ISO8859-2',
     85   'sk_SK.iso88592': 'ISO8859-2',
     86   'slovak': 'ISO8859-2',
     87   'sv_FI': 'ISO8859-1',
     88   'sv_FI.iso88591': 'ISO8859-1',
     89   'sv_FI@euro': 'ISO8859-15',
     90   'sv_FI.iso885915@euro': 'ISO8859-15',
     91   'uk_UA': 'KOI8-U',
     92   'uk_UA.koi8u': 'KOI8-U'
     93 }
     94 
     95 integers = [
     96   "",
     97   "1",
     98   "12",
     99   "123",
    100   "1234",
    101   "12345",
    102   "123456",
    103   "1234567",
    104   "12345678",
    105   "123456789",
    106   "1234567890",
    107   "12345678901",
    108   "123456789012",
    109   "1234567890123",
    110   "12345678901234",
    111   "123456789012345",
    112   "1234567890123456",
    113   "12345678901234567",
    114   "123456789012345678",
    115   "1234567890123456789",
    116   "12345678901234567890",
    117   "123456789012345678901",
    118   "1234567890123456789012",
    119 ]
    120 
    121 numbers = [
    122   "0", "-0", "+0",
    123   "0.0", "-0.0", "+0.0",
    124   "0e0", "-0e0", "+0e0",
    125   ".0", "-.0",
    126   ".1", "-.1",
    127   "1.1", "-1.1",
    128   "1e1", "-1e1"
    129 ]
    130 
    131 # Get the list of available locales.
    132 if platform.system() == 'Windows':
    133     locale_list = windows_lang_strings
    134 else:
    135     locale_list = ['C']
    136     if os.path.isfile("/var/lib/locales/supported.d/local"):
    137         # On Ubuntu, `locale -a` gives the wrong case for some locales,
    138         # so we get the correct names directly:
    139         with open("/var/lib/locales/supported.d/local") as f:
    140             locale_list = [loc.split()[0] for loc in f.readlines() \
    141                            if not loc.startswith('#')]
    142     elif find_executable('locale'):
    143         locale_list = subprocess.Popen(["locale", "-a"],
    144                           stdout=subprocess.PIPE).communicate()[0]
    145         try:
    146             locale_list = locale_list.decode()
    147         except UnicodeDecodeError:
    148             # Some distributions insist on using latin-1 characters
    149             # in their locale names.
    150             locale_list = locale_list.decode('latin-1')
    151         locale_list = locale_list.split('\n')
    152 try:
    153     locale_list.remove('')
    154 except ValueError:
    155     pass
    156 
    157 # Debian
    158 if os.path.isfile("/etc/locale.alias"):
    159     with open("/etc/locale.alias") as f:
    160         while 1:
    161             try:
    162                 line = f.readline()
    163             except UnicodeDecodeError:
    164                 continue
    165             if line == "":
    166                 break
    167             if line.startswith('#'):
    168                 continue
    169             x = line.split()
    170             if len(x) == 2:
    171                 if x[0] in locale_list:
    172                     locale_list.remove(x[0])
    173 
    174 # FreeBSD
    175 if platform.system() == 'FreeBSD':
    176     # http://www.freebsd.org/cgi/query-pr.cgi?pr=142173
    177     # en_GB.US-ASCII has 163 as the currency symbol.
    178     for loc in ['it_CH.ISO8859-1', 'it_CH.ISO8859-15', 'it_CH.UTF-8',
    179                 'it_IT.ISO8859-1', 'it_IT.ISO8859-15', 'it_IT.UTF-8',
    180                 'sl_SI.ISO8859-2', 'sl_SI.UTF-8',
    181                 'en_GB.US-ASCII']:
    182         try:
    183             locale_list.remove(loc)
    184         except ValueError:
    185             pass
    186 
    187 # Print a testcase in the format of the IBM tests (for runtest.c):
    188 def get_preferred_encoding():
    189     loc = locale.setlocale(locale.LC_CTYPE)
    190     if loc in preferred_encoding:
    191         return preferred_encoding[loc]
    192     else:
    193         return locale.getpreferredencoding()
    194 
    195 def printit(testno, s, fmt, encoding=None):
    196     if not encoding:
    197         encoding = get_preferred_encoding()
    198     try:
    199         result = format(P.Decimal(s), fmt)
    200         fmt = str(fmt.encode(encoding))[2:-1]
    201         result = str(result.encode(encoding))[2:-1]
    202         if "'" in result:
    203             sys.stdout.write("xfmt%d  format  %s  '%s'  ->  \"%s\"\n"
    204                              % (testno, s, fmt, result))
    205         else:
    206             sys.stdout.write("xfmt%d  format  %s  '%s'  ->  '%s'\n"
    207                              % (testno, s, fmt, result))
    208     except Exception as err:
    209         sys.stderr.write("%s  %s  %s\n" % (err, s, fmt))
    210 
    211 
    212 # Check if an integer can be converted to a valid fill character.
    213 def check_fillchar(i):
    214     try:
    215         c = chr(i)
    216         c.encode('utf-8').decode()
    217         format(P.Decimal(0), c + '<19g')
    218         return c
    219     except:
    220         return None
    221 
    222 # Generate all unicode characters that are accepted as
    223 # fill characters by decimal.py.
    224 def all_fillchars():
    225     for i in range(0, 0x110002):
    226         c = check_fillchar(i)
    227         if c: yield c
    228 
    229 # Return random fill character.
    230 def rand_fillchar():
    231     while 1:
    232         i = random.randrange(0, 0x110002)
    233         c = check_fillchar(i)
    234         if c: return c
    235 
    236 # Generate random format strings
    237 # [[fill]align][sign][#][0][width][.precision][type]
    238 def rand_format(fill, typespec='EeGgFfn%'):
    239     active = sorted(random.sample(range(7), random.randrange(8)))
    240     have_align = 0
    241     s = ''
    242     for elem in active:
    243         if elem == 0: # fill+align
    244             s += fill
    245             s += random.choice('<>=^')
    246             have_align = 1
    247         elif elem == 1: # sign
    248             s += random.choice('+- ')
    249         elif elem == 2 and not have_align: # zeropad
    250             s += '0'
    251         elif elem == 3: # width
    252             s += str(random.randrange(1, 100))
    253         elif elem == 4: # thousands separator
    254             s += ','
    255         elif elem == 5: # prec
    256             s += '.'
    257             s += str(random.randrange(100))
    258         elif elem == 6:
    259             if 4 in active: c = typespec.replace('n', '')
    260             else: c = typespec
    261             s += random.choice(c)
    262     return s
    263 
    264 # Partially brute force all possible format strings containing a thousands
    265 # separator. Fall back to random where the runtime would become excessive.
    266 # [[fill]align][sign][#][0][width][,][.precision][type]
    267 def all_format_sep():
    268     for align in ('', '<', '>', '=', '^'):
    269         for fill in ('', 'x'):
    270             if align == '': fill = ''
    271             for sign in ('', '+', '-', ' '):
    272                 for zeropad in ('', '0'):
    273                     if align != '': zeropad = ''
    274                     for width in ['']+[str(y) for y in range(1, 15)]+['101']:
    275                         for prec in ['']+['.'+str(y) for y in range(15)]:
    276                             # for type in ('', 'E', 'e', 'G', 'g', 'F', 'f', '%'):
    277                             type = random.choice(('', 'E', 'e', 'G', 'g', 'F', 'f', '%'))
    278                             yield ''.join((fill, align, sign, zeropad, width, ',', prec, type))
    279 
    280 # Partially brute force all possible format strings with an 'n' specifier.
    281 # [[fill]align][sign][#][0][width][,][.precision][type]
    282 def all_format_loc():
    283     for align in ('', '<', '>', '=', '^'):
    284         for fill in ('', 'x'):
    285             if align == '': fill = ''
    286             for sign in ('', '+', '-', ' '):
    287                 for zeropad in ('', '0'):
    288                     if align != '': zeropad = ''
    289                     for width in ['']+[str(y) for y in range(1, 20)]+['101']:
    290                         for prec in ['']+['.'+str(y) for y in range(1, 20)]:
    291                             yield ''.join((fill, align, sign, zeropad, width, prec, 'n'))
    292 
    293 # Generate random format strings with a unicode fill character
    294 # [[fill]align][sign][#][0][width][,][.precision][type]
    295 def randfill(fill):
    296     active = sorted(random.sample(range(5), random.randrange(6)))
    297     s = ''
    298     s += str(fill)
    299     s += random.choice('<>=^')
    300     for elem in active:
    301         if elem == 0: # sign
    302             s += random.choice('+- ')
    303         elif elem == 1: # width
    304             s += str(random.randrange(1, 100))
    305         elif elem == 2: # thousands separator
    306             s += ','
    307         elif elem == 3: # prec
    308             s += '.'
    309             s += str(random.randrange(100))
    310         elif elem == 4:
    311             if 2 in active: c = 'EeGgFf%'
    312             else: c = 'EeGgFfn%'
    313             s += random.choice(c)
    314     return s
    315 
    316 # Generate random format strings with random locale setting
    317 # [[fill]align][sign][#][0][width][,][.precision][type]
    318 def rand_locale():
    319     try:
    320         loc = random.choice(locale_list)
    321         locale.setlocale(locale.LC_ALL, loc)
    322     except locale.Error as err:
    323         pass
    324     active = sorted(random.sample(range(5), random.randrange(6)))
    325     s = ''
    326     have_align = 0
    327     for elem in active:
    328         if elem == 0: # fill+align
    329             s += chr(random.randrange(32, 128))
    330             s += random.choice('<>=^')
    331             have_align = 1
    332         elif elem == 1: # sign
    333             s += random.choice('+- ')
    334         elif elem == 2 and not have_align: # zeropad
    335             s += '0'
    336         elif elem == 3: # width
    337             s += str(random.randrange(1, 100))
    338         elif elem == 4: # prec
    339             s += '.'
    340             s += str(random.randrange(100))
    341     s += 'n'
    342     return s
    343