Home | History | Annotate | Download | only in test
      1 import io
      2 import shlex
      3 import string
      4 import unittest
      5 
      6 
      7 
      8 # The original test data set was from shellwords, by Hartmut Goebel.
      9 
     10 data = r"""x|x|
     11 foo bar|foo|bar|
     12  foo bar|foo|bar|
     13  foo bar |foo|bar|
     14 foo   bar    bla     fasel|foo|bar|bla|fasel|
     15 x y  z              xxxx|x|y|z|xxxx|
     16 \x bar|\|x|bar|
     17 \ x bar|\|x|bar|
     18 \ bar|\|bar|
     19 foo \x bar|foo|\|x|bar|
     20 foo \ x bar|foo|\|x|bar|
     21 foo \ bar|foo|\|bar|
     22 foo "bar" bla|foo|"bar"|bla|
     23 "foo" "bar" "bla"|"foo"|"bar"|"bla"|
     24 "foo" bar "bla"|"foo"|bar|"bla"|
     25 "foo" bar bla|"foo"|bar|bla|
     26 foo 'bar' bla|foo|'bar'|bla|
     27 'foo' 'bar' 'bla'|'foo'|'bar'|'bla'|
     28 'foo' bar 'bla'|'foo'|bar|'bla'|
     29 'foo' bar bla|'foo'|bar|bla|
     30 blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz|
     31 blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz|
     32 ""|""|
     33 ''|''|
     34 foo "" bar|foo|""|bar|
     35 foo '' bar|foo|''|bar|
     36 foo "" "" "" bar|foo|""|""|""|bar|
     37 foo '' '' '' bar|foo|''|''|''|bar|
     38 \""|\|""|
     39 "\"|"\"|
     40 "foo\ bar"|"foo\ bar"|
     41 "foo\\ bar"|"foo\\ bar"|
     42 "foo\\ bar\"|"foo\\ bar\"|
     43 "foo\\" bar\""|"foo\\"|bar|\|""|
     44 "foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"|
     45 "foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"|
     46 "foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"|
     47 "foo\x bar\" dfadf"|"foo\x bar\"|dfadf"|
     48 \''|\|''|
     49 'foo\ bar'|'foo\ bar'|
     50 'foo\\ bar'|'foo\\ bar'|
     51 "foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'|
     52 \"foo"|\|"foo"|
     53 \"foo"\x|\|"foo"|\|x|
     54 "foo\x"|"foo\x"|
     55 "foo\ "|"foo\ "|
     56 foo\ xx|foo|\|xx|
     57 foo\ x\x|foo|\|x|\|x|
     58 foo\ x\x\""|foo|\|x|\|x|\|""|
     59 "foo\ x\x"|"foo\ x\x"|
     60 "foo\ x\x\\"|"foo\ x\x\\"|
     61 "foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"|
     62 "foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"|
     63 "foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"|
     64 "foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'|
     65 'foo\ bar'|'foo\ bar'|
     66 'foo\\ bar'|'foo\\ bar'|
     67 foo\ bar|foo|\|bar|
     68 foo#bar\nbaz|foobaz|
     69 :-) ;-)|:|-|)|;|-|)|
     70 ||||||
     71 """
     72 
     73 posix_data = r"""x|x|
     74 foo bar|foo|bar|
     75  foo bar|foo|bar|
     76  foo bar |foo|bar|
     77 foo   bar    bla     fasel|foo|bar|bla|fasel|
     78 x y  z              xxxx|x|y|z|xxxx|
     79 \x bar|x|bar|
     80 \ x bar| x|bar|
     81 \ bar| bar|
     82 foo \x bar|foo|x|bar|
     83 foo \ x bar|foo| x|bar|
     84 foo \ bar|foo| bar|
     85 foo "bar" bla|foo|bar|bla|
     86 "foo" "bar" "bla"|foo|bar|bla|
     87 "foo" bar "bla"|foo|bar|bla|
     88 "foo" bar bla|foo|bar|bla|
     89 foo 'bar' bla|foo|bar|bla|
     90 'foo' 'bar' 'bla'|foo|bar|bla|
     91 'foo' bar 'bla'|foo|bar|bla|
     92 'foo' bar bla|foo|bar|bla|
     93 blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
     94 blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
     95 ""||
     96 ''||
     97 foo "" bar|foo||bar|
     98 foo '' bar|foo||bar|
     99 foo "" "" "" bar|foo||||bar|
    100 foo '' '' '' bar|foo||||bar|
    101 \"|"|
    102 "\""|"|
    103 "foo\ bar"|foo\ bar|
    104 "foo\\ bar"|foo\ bar|
    105 "foo\\ bar\""|foo\ bar"|
    106 "foo\\" bar\"|foo\|bar"|
    107 "foo\\ bar\" dfadf"|foo\ bar" dfadf|
    108 "foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
    109 "foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
    110 "foo\x bar\" dfadf"|foo\x bar" dfadf|
    111 \'|'|
    112 'foo\ bar'|foo\ bar|
    113 'foo\\ bar'|foo\\ bar|
    114 "foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
    115 \"foo|"foo|
    116 \"foo\x|"foox|
    117 "foo\x"|foo\x|
    118 "foo\ "|foo\ |
    119 foo\ xx|foo xx|
    120 foo\ x\x|foo xx|
    121 foo\ x\x\"|foo xx"|
    122 "foo\ x\x"|foo\ x\x|
    123 "foo\ x\x\\"|foo\ x\x\|
    124 "foo\ x\x\\""foobar"|foo\ x\x\foobar|
    125 "foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
    126 "foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
    127 "foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
    128 "foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
    129 'foo\ bar'|foo\ bar|
    130 'foo\\ bar'|foo\\ bar|
    131 foo\ bar|foo bar|
    132 foo#bar\nbaz|foo|baz|
    133 :-) ;-)|:-)|;-)|
    134 ||
    135 """
    136 
    137 class ShlexTest(unittest.TestCase):
    138     def setUp(self):
    139         self.data = [x.split("|")[:-1]
    140                      for x in data.splitlines()]
    141         self.posix_data = [x.split("|")[:-1]
    142                            for x in posix_data.splitlines()]
    143         for item in self.data:
    144             item[0] = item[0].replace(r"\n", "\n")
    145         for item in self.posix_data:
    146             item[0] = item[0].replace(r"\n", "\n")
    147 
    148     def splitTest(self, data, comments):
    149         for i in range(len(data)):
    150             l = shlex.split(data[i][0], comments=comments)
    151             self.assertEqual(l, data[i][1:],
    152                              "%s: %s != %s" %
    153                              (data[i][0], l, data[i][1:]))
    154 
    155     def oldSplit(self, s):
    156         ret = []
    157         lex = shlex.shlex(io.StringIO(s))
    158         tok = lex.get_token()
    159         while tok:
    160             ret.append(tok)
    161             tok = lex.get_token()
    162         return ret
    163 
    164     def testSplitPosix(self):
    165         """Test data splitting with posix parser"""
    166         self.splitTest(self.posix_data, comments=True)
    167 
    168     def testCompat(self):
    169         """Test compatibility interface"""
    170         for i in range(len(self.data)):
    171             l = self.oldSplit(self.data[i][0])
    172             self.assertEqual(l, self.data[i][1:],
    173                              "%s: %s != %s" %
    174                              (self.data[i][0], l, self.data[i][1:]))
    175 
    176     def testSyntaxSplitAmpersandAndPipe(self):
    177         """Test handling of syntax splitting of &, |"""
    178         # Could take these forms: &&, &, |&, ;&, ;;&
    179         # of course, the same applies to | and ||
    180         # these should all parse to the same output
    181         for delimiter in ('&&', '&', '|&', ';&', ';;&',
    182                           '||', '|', '&|', ';|', ';;|'):
    183             src = ['echo hi %s echo bye' % delimiter,
    184                    'echo hi%secho bye' % delimiter]
    185             ref = ['echo', 'hi', delimiter, 'echo', 'bye']
    186             for ss in src:
    187                 s = shlex.shlex(ss, punctuation_chars=True)
    188                 result = list(s)
    189                 self.assertEqual(ref, result, "While splitting '%s'" % ss)
    190 
    191     def testSyntaxSplitSemicolon(self):
    192         """Test handling of syntax splitting of ;"""
    193         # Could take these forms: ;, ;;, ;&, ;;&
    194         # these should all parse to the same output
    195         for delimiter in (';', ';;', ';&', ';;&'):
    196             src = ['echo hi %s echo bye' % delimiter,
    197                    'echo hi%s echo bye' % delimiter,
    198                    'echo hi%secho bye' % delimiter]
    199             ref = ['echo', 'hi', delimiter, 'echo', 'bye']
    200             for ss in src:
    201                 s = shlex.shlex(ss, punctuation_chars=True)
    202                 result = list(s)
    203                 self.assertEqual(ref, result, "While splitting '%s'" % ss)
    204 
    205     def testSyntaxSplitRedirect(self):
    206         """Test handling of syntax splitting of >"""
    207         # of course, the same applies to <, |
    208         # these should all parse to the same output
    209         for delimiter in ('<', '|'):
    210             src = ['echo hi %s out' % delimiter,
    211                    'echo hi%s out' % delimiter,
    212                    'echo hi%sout' % delimiter]
    213             ref = ['echo', 'hi', delimiter, 'out']
    214             for ss in src:
    215                 s = shlex.shlex(ss, punctuation_chars=True)
    216                 result = list(s)
    217                 self.assertEqual(ref, result, "While splitting '%s'" % ss)
    218 
    219     def testSyntaxSplitParen(self):
    220         """Test handling of syntax splitting of ()"""
    221         # these should all parse to the same output
    222         src = ['( echo hi )',
    223                '(echo hi)']
    224         ref = ['(', 'echo', 'hi', ')']
    225         for ss in src:
    226             s = shlex.shlex(ss, punctuation_chars=True)
    227             result = list(s)
    228             self.assertEqual(ref, result, "While splitting '%s'" % ss)
    229 
    230     def testSyntaxSplitCustom(self):
    231         """Test handling of syntax splitting with custom chars"""
    232         ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?']
    233         ss = "~/a && b-c --color=auto || d *.py?"
    234         s = shlex.shlex(ss, punctuation_chars="|")
    235         result = list(s)
    236         self.assertEqual(ref, result, "While splitting '%s'" % ss)
    237 
    238     def testTokenTypes(self):
    239         """Test that tokens are split with types as expected."""
    240         for source, expected in (
    241                                 ('a && b || c',
    242                                  [('a', 'a'), ('&&', 'c'), ('b', 'a'),
    243                                   ('||', 'c'), ('c', 'a')]),
    244                               ):
    245             s = shlex.shlex(source, punctuation_chars=True)
    246             observed = []
    247             while True:
    248                 t = s.get_token()
    249                 if t == s.eof:
    250                     break
    251                 if t[0] in s.punctuation_chars:
    252                     tt = 'c'
    253                 else:
    254                     tt = 'a'
    255                 observed.append((t, tt))
    256             self.assertEqual(observed, expected)
    257 
    258     def testPunctuationInWordChars(self):
    259         """Test that any punctuation chars are removed from wordchars"""
    260         s = shlex.shlex('a_b__c', punctuation_chars='_')
    261         self.assertNotIn('_', s.wordchars)
    262         self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])
    263 
    264     def testPunctuationWithWhitespaceSplit(self):
    265         """Test that with whitespace_split, behaviour is as expected"""
    266         s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
    267         # whitespace_split is False, so splitting will be based on
    268         # punctuation_chars
    269         self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c'])
    270         s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
    271         s.whitespace_split = True
    272         # whitespace_split is True, so splitting will be based on
    273         # white space
    274         self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c'])
    275 
    276     def testPunctuationWithPosix(self):
    277         """Test that punctuation_chars and posix behave correctly together."""
    278         # see Issue #29132
    279         s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True)
    280         self.assertEqual(list(s), ['f', '>', 'abc'])
    281         s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True)
    282         self.assertEqual(list(s), ['f', '>', '"abc"'])
    283 
    284     def testEmptyStringHandling(self):
    285         """Test that parsing of empty strings is correctly handled."""
    286         # see Issue #21999
    287         expected = ['', ')', 'abc']
    288         for punct in (False, True):
    289             s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)
    290             slist = list(s)
    291             self.assertEqual(slist, expected)
    292         expected = ["''", ')', 'abc']
    293         s = shlex.shlex("'')abc", punctuation_chars=True)
    294         self.assertEqual(list(s), expected)
    295 
    296     def testQuote(self):
    297         safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'
    298         unicode_sample = '\xe9\xe0\xdf'  # e + acute accent, a + grave, sharp s
    299         unsafe = '"`$\\!' + unicode_sample
    300 
    301         self.assertEqual(shlex.quote(''), "''")
    302         self.assertEqual(shlex.quote(safeunquoted), safeunquoted)
    303         self.assertEqual(shlex.quote('test file name'), "'test file name'")
    304         for u in unsafe:
    305             self.assertEqual(shlex.quote('test%sname' % u),
    306                              "'test%sname'" % u)
    307         for u in unsafe:
    308             self.assertEqual(shlex.quote("test%s'name'" % u),
    309                              "'test%s'\"'\"'name'\"'\"''" % u)
    310 
    311 # Allow this test to be used with old shlex.py
    312 if not getattr(shlex, "split", None):
    313     for methname in dir(ShlexTest):
    314         if methname.startswith("test") and methname != "testCompat":
    315             delattr(ShlexTest, methname)
    316 
    317 if __name__ == "__main__":
    318     unittest.main()
    319