1 import io 2 import shlex 3 import string 4 import unittest 5 6 7 8 # The original test data set was from shellwords, by Hartmut Goebel. 9 10 data = r"""x|x| 11 foo bar|foo|bar| 12 foo bar|foo|bar| 13 foo bar |foo|bar| 14 foo bar bla fasel|foo|bar|bla|fasel| 15 x y z xxxx|x|y|z|xxxx| 16 \x bar|\|x|bar| 17 \ x bar|\|x|bar| 18 \ bar|\|bar| 19 foo \x bar|foo|\|x|bar| 20 foo \ x bar|foo|\|x|bar| 21 foo \ bar|foo|\|bar| 22 foo "bar" bla|foo|"bar"|bla| 23 "foo" "bar" "bla"|"foo"|"bar"|"bla"| 24 "foo" bar "bla"|"foo"|bar|"bla"| 25 "foo" bar bla|"foo"|bar|bla| 26 foo 'bar' bla|foo|'bar'|bla| 27 'foo' 'bar' 'bla'|'foo'|'bar'|'bla'| 28 'foo' bar 'bla'|'foo'|bar|'bla'| 29 'foo' bar bla|'foo'|bar|bla| 30 blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz| 31 blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz| 32 ""|""| 33 ''|''| 34 foo "" bar|foo|""|bar| 35 foo '' bar|foo|''|bar| 36 foo "" "" "" bar|foo|""|""|""|bar| 37 foo '' '' '' bar|foo|''|''|''|bar| 38 \""|\|""| 39 "\"|"\"| 40 "foo\ bar"|"foo\ bar"| 41 "foo\\ bar"|"foo\\ bar"| 42 "foo\\ bar\"|"foo\\ bar\"| 43 "foo\\" bar\""|"foo\\"|bar|\|""| 44 "foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"| 45 "foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"| 46 "foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"| 47 "foo\x bar\" dfadf"|"foo\x bar\"|dfadf"| 48 \''|\|''| 49 'foo\ bar'|'foo\ bar'| 50 'foo\\ bar'|'foo\\ bar'| 51 "foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'| 52 \"foo"|\|"foo"| 53 \"foo"\x|\|"foo"|\|x| 54 "foo\x"|"foo\x"| 55 "foo\ "|"foo\ "| 56 foo\ xx|foo|\|xx| 57 foo\ x\x|foo|\|x|\|x| 58 foo\ x\x\""|foo|\|x|\|x|\|""| 59 "foo\ x\x"|"foo\ x\x"| 60 "foo\ x\x\\"|"foo\ x\x\\"| 61 "foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"| 62 "foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"| 63 "foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"| 64 "foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'| 65 'foo\ bar'|'foo\ bar'| 66 'foo\\ bar'|'foo\\ bar'| 67 foo\ bar|foo|\|bar| 68 foo#bar\nbaz|foobaz| 69 :-) ;-)|:|-|)|;|-|)| 70 |||||| 71 """ 72 73 posix_data = r"""x|x| 74 foo bar|foo|bar| 75 foo bar|foo|bar| 76 foo bar |foo|bar| 77 foo bar bla fasel|foo|bar|bla|fasel| 78 x y z xxxx|x|y|z|xxxx| 79 \x bar|x|bar| 80 \ x bar| x|bar| 81 \ bar| bar| 82 foo \x bar|foo|x|bar| 83 foo \ x bar|foo| x|bar| 84 foo \ bar|foo| bar| 85 foo "bar" bla|foo|bar|bla| 86 "foo" "bar" "bla"|foo|bar|bla| 87 "foo" bar "bla"|foo|bar|bla| 88 "foo" bar bla|foo|bar|bla| 89 foo 'bar' bla|foo|bar|bla| 90 'foo' 'bar' 'bla'|foo|bar|bla| 91 'foo' bar 'bla'|foo|bar|bla| 92 'foo' bar bla|foo|bar|bla| 93 blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz| 94 blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz| 95 ""|| 96 ''|| 97 foo "" bar|foo||bar| 98 foo '' bar|foo||bar| 99 foo "" "" "" bar|foo||||bar| 100 foo '' '' '' bar|foo||||bar| 101 \"|"| 102 "\""|"| 103 "foo\ bar"|foo\ bar| 104 "foo\\ bar"|foo\ bar| 105 "foo\\ bar\""|foo\ bar"| 106 "foo\\" bar\"|foo\|bar"| 107 "foo\\ bar\" dfadf"|foo\ bar" dfadf| 108 "foo\\\ bar\" dfadf"|foo\\ bar" dfadf| 109 "foo\\\x bar\" dfadf"|foo\\x bar" dfadf| 110 "foo\x bar\" dfadf"|foo\x bar" dfadf| 111 \'|'| 112 'foo\ bar'|foo\ bar| 113 'foo\\ bar'|foo\\ bar| 114 "foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df| 115 \"foo|"foo| 116 \"foo\x|"foox| 117 "foo\x"|foo\x| 118 "foo\ "|foo\ | 119 foo\ xx|foo xx| 120 foo\ x\x|foo xx| 121 foo\ x\x\"|foo xx"| 122 "foo\ x\x"|foo\ x\x| 123 "foo\ x\x\\"|foo\ x\x\| 124 "foo\ x\x\\""foobar"|foo\ x\x\foobar| 125 "foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar| 126 "foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar| 127 "foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't| 128 "foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\| 129 'foo\ bar'|foo\ bar| 130 'foo\\ bar'|foo\\ bar| 131 foo\ bar|foo bar| 132 foo#bar\nbaz|foo|baz| 133 :-) ;-)|:-)|;-)| 134 || 135 """ 136 137 class ShlexTest(unittest.TestCase): 138 def setUp(self): 139 self.data = [x.split("|")[:-1] 140 for x in data.splitlines()] 141 self.posix_data = [x.split("|")[:-1] 142 for x in posix_data.splitlines()] 143 for item in self.data: 144 item[0] = item[0].replace(r"\n", "\n") 145 for item in self.posix_data: 146 item[0] = item[0].replace(r"\n", "\n") 147 148 def splitTest(self, data, comments): 149 for i in range(len(data)): 150 l = shlex.split(data[i][0], comments=comments) 151 self.assertEqual(l, data[i][1:], 152 "%s: %s != %s" % 153 (data[i][0], l, data[i][1:])) 154 155 def oldSplit(self, s): 156 ret = [] 157 lex = shlex.shlex(io.StringIO(s)) 158 tok = lex.get_token() 159 while tok: 160 ret.append(tok) 161 tok = lex.get_token() 162 return ret 163 164 def testSplitPosix(self): 165 """Test data splitting with posix parser""" 166 self.splitTest(self.posix_data, comments=True) 167 168 def testCompat(self): 169 """Test compatibility interface""" 170 for i in range(len(self.data)): 171 l = self.oldSplit(self.data[i][0]) 172 self.assertEqual(l, self.data[i][1:], 173 "%s: %s != %s" % 174 (self.data[i][0], l, self.data[i][1:])) 175 176 def testSyntaxSplitAmpersandAndPipe(self): 177 """Test handling of syntax splitting of &, |""" 178 # Could take these forms: &&, &, |&, ;&, ;;& 179 # of course, the same applies to | and || 180 # these should all parse to the same output 181 for delimiter in ('&&', '&', '|&', ';&', ';;&', 182 '||', '|', '&|', ';|', ';;|'): 183 src = ['echo hi %s echo bye' % delimiter, 184 'echo hi%secho bye' % delimiter] 185 ref = ['echo', 'hi', delimiter, 'echo', 'bye'] 186 for ss in src: 187 s = shlex.shlex(ss, punctuation_chars=True) 188 result = list(s) 189 self.assertEqual(ref, result, "While splitting '%s'" % ss) 190 191 def testSyntaxSplitSemicolon(self): 192 """Test handling of syntax splitting of ;""" 193 # Could take these forms: ;, ;;, ;&, ;;& 194 # these should all parse to the same output 195 for delimiter in (';', ';;', ';&', ';;&'): 196 src = ['echo hi %s echo bye' % delimiter, 197 'echo hi%s echo bye' % delimiter, 198 'echo hi%secho bye' % delimiter] 199 ref = ['echo', 'hi', delimiter, 'echo', 'bye'] 200 for ss in src: 201 s = shlex.shlex(ss, punctuation_chars=True) 202 result = list(s) 203 self.assertEqual(ref, result, "While splitting '%s'" % ss) 204 205 def testSyntaxSplitRedirect(self): 206 """Test handling of syntax splitting of >""" 207 # of course, the same applies to <, | 208 # these should all parse to the same output 209 for delimiter in ('<', '|'): 210 src = ['echo hi %s out' % delimiter, 211 'echo hi%s out' % delimiter, 212 'echo hi%sout' % delimiter] 213 ref = ['echo', 'hi', delimiter, 'out'] 214 for ss in src: 215 s = shlex.shlex(ss, punctuation_chars=True) 216 result = list(s) 217 self.assertEqual(ref, result, "While splitting '%s'" % ss) 218 219 def testSyntaxSplitParen(self): 220 """Test handling of syntax splitting of ()""" 221 # these should all parse to the same output 222 src = ['( echo hi )', 223 '(echo hi)'] 224 ref = ['(', 'echo', 'hi', ')'] 225 for ss in src: 226 s = shlex.shlex(ss, punctuation_chars=True) 227 result = list(s) 228 self.assertEqual(ref, result, "While splitting '%s'" % ss) 229 230 def testSyntaxSplitCustom(self): 231 """Test handling of syntax splitting with custom chars""" 232 ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?'] 233 ss = "~/a && b-c --color=auto || d *.py?" 234 s = shlex.shlex(ss, punctuation_chars="|") 235 result = list(s) 236 self.assertEqual(ref, result, "While splitting '%s'" % ss) 237 238 def testTokenTypes(self): 239 """Test that tokens are split with types as expected.""" 240 for source, expected in ( 241 ('a && b || c', 242 [('a', 'a'), ('&&', 'c'), ('b', 'a'), 243 ('||', 'c'), ('c', 'a')]), 244 ): 245 s = shlex.shlex(source, punctuation_chars=True) 246 observed = [] 247 while True: 248 t = s.get_token() 249 if t == s.eof: 250 break 251 if t[0] in s.punctuation_chars: 252 tt = 'c' 253 else: 254 tt = 'a' 255 observed.append((t, tt)) 256 self.assertEqual(observed, expected) 257 258 def testPunctuationInWordChars(self): 259 """Test that any punctuation chars are removed from wordchars""" 260 s = shlex.shlex('a_b__c', punctuation_chars='_') 261 self.assertNotIn('_', s.wordchars) 262 self.assertEqual(list(s), ['a', '_', 'b', '__', 'c']) 263 264 def testPunctuationWithWhitespaceSplit(self): 265 """Test that with whitespace_split, behaviour is as expected""" 266 s = shlex.shlex('a && b || c', punctuation_chars='&') 267 # whitespace_split is False, so splitting will be based on 268 # punctuation_chars 269 self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c']) 270 s = shlex.shlex('a && b || c', punctuation_chars='&') 271 s.whitespace_split = True 272 # whitespace_split is True, so splitting will be based on 273 # white space 274 self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c']) 275 276 def testPunctuationWithPosix(self): 277 """Test that punctuation_chars and posix behave correctly together.""" 278 # see Issue #29132 279 s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True) 280 self.assertEqual(list(s), ['f', '>', 'abc']) 281 s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True) 282 self.assertEqual(list(s), ['f', '>', '"abc"']) 283 284 def testEmptyStringHandling(self): 285 """Test that parsing of empty strings is correctly handled.""" 286 # see Issue #21999 287 expected = ['', ')', 'abc'] 288 for punct in (False, True): 289 s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct) 290 slist = list(s) 291 self.assertEqual(slist, expected) 292 expected = ["''", ')', 'abc'] 293 s = shlex.shlex("'')abc", punctuation_chars=True) 294 self.assertEqual(list(s), expected) 295 296 def testQuote(self): 297 safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./' 298 unicode_sample = '\xe9\xe0\xdf' # e + acute accent, a + grave, sharp s 299 unsafe = '"`$\\!' + unicode_sample 300 301 self.assertEqual(shlex.quote(''), "''") 302 self.assertEqual(shlex.quote(safeunquoted), safeunquoted) 303 self.assertEqual(shlex.quote('test file name'), "'test file name'") 304 for u in unsafe: 305 self.assertEqual(shlex.quote('test%sname' % u), 306 "'test%sname'" % u) 307 for u in unsafe: 308 self.assertEqual(shlex.quote("test%s'name'" % u), 309 "'test%s'\"'\"'name'\"'\"''" % u) 310 311 # Allow this test to be used with old shlex.py 312 if not getattr(shlex, "split", None): 313 for methname in dir(ShlexTest): 314 if methname.startswith("test") and methname != "testCompat": 315 delattr(ShlexTest, methname) 316 317 if __name__ == "__main__": 318 unittest.main() 319