1 import binascii 2 import io 3 import os 4 import re 5 import sys 6 import tempfile 7 import mimetypes 8 try: 9 import simplejson as json 10 except ImportError: 11 import json 12 import warnings 13 14 from webob.acceptparse import ( 15 AcceptLanguage, 16 AcceptCharset, 17 MIMEAccept, 18 MIMENilAccept, 19 NoAccept, 20 accept_property, 21 ) 22 23 from webob.cachecontrol import ( 24 CacheControl, 25 serialize_cache_control, 26 ) 27 28 from webob.compat import ( 29 PY3, 30 bytes_, 31 integer_types, 32 native_, 33 parse_qsl_text, 34 reraise, 35 text_type, 36 url_encode, 37 url_quote, 38 url_unquote, 39 quote_plus, 40 urlparse, 41 cgi_FieldStorage 42 ) 43 44 from webob.cookies import RequestCookies 45 46 from webob.descriptors import ( 47 CHARSET_RE, 48 SCHEME_RE, 49 converter, 50 converter_date, 51 environ_getter, 52 environ_decoder, 53 parse_auth, 54 parse_int, 55 parse_int_safe, 56 parse_range, 57 serialize_auth, 58 serialize_if_range, 59 serialize_int, 60 serialize_range, 61 upath_property, 62 deprecated_property, 63 ) 64 65 from webob.etag import ( 66 IfRange, 67 AnyETag, 68 NoETag, 69 etag_property, 70 ) 71 72 from webob.headers import EnvironHeaders 73 74 from webob.multidict import ( 75 NestedMultiDict, 76 MultiDict, 77 NoVars, 78 GetDict, 79 ) 80 81 from webob.util import warn_deprecation 82 83 __all__ = ['BaseRequest', 'Request', 'LegacyRequest'] 84 85 class _NoDefault: 86 def __repr__(self): 87 return '(No Default)' 88 NoDefault = _NoDefault() 89 90 PATH_SAFE = '/:@&+$,' 91 92 http_method_probably_has_body = dict.fromkeys( 93 ('GET', 'HEAD', 'DELETE', 'TRACE'), False) 94 http_method_probably_has_body.update( 95 dict.fromkeys(('POST', 'PUT', 'PATCH'), True)) 96 97 _LATIN_ENCODINGS = ( 98 'ascii', 'latin-1', 'latin', 'latin_1', 'l1', 'latin1', 99 'iso-8859-1', 'iso8859_1', 'iso_8859_1', 'iso8859', '8859', 100 ) 101 102 class BaseRequest(object): 103 ## The limit after which request bodies should be stored on disk 104 ## if they are read in (under this, and the request body is stored 105 ## in memory): 106 request_body_tempfile_limit = 10*1024 107 108 _charset = None 109 110 def __init__(self, environ, charset=None, unicode_errors=None, 111 decode_param_names=None, **kw): 112 113 if type(environ) is not dict: 114 raise TypeError( 115 "WSGI environ must be a dict; you passed %r" % (environ,)) 116 if unicode_errors is not None: 117 warnings.warn( 118 "You unicode_errors=%r to the Request constructor. Passing a " 119 "``unicode_errors`` value to the Request is no longer " 120 "supported in WebOb 1.2+. This value has been ignored " % ( 121 unicode_errors,), 122 DeprecationWarning 123 ) 124 if decode_param_names is not None: 125 warnings.warn( 126 "You passed decode_param_names=%r to the Request constructor. " 127 "Passing a ``decode_param_names`` value to the Request " 128 "is no longer supported in WebOb 1.2+. This value has " 129 "been ignored " % (decode_param_names,), 130 DeprecationWarning 131 ) 132 if not _is_utf8(charset): 133 raise DeprecationWarning( 134 "You passed charset=%r to the Request constructor. As of " 135 "WebOb 1.2, if your application needs a non-UTF-8 request " 136 "charset, please construct the request without a charset or " 137 "with a charset of 'None', then use ``req = " 138 "req.decode(charset)``" % charset 139 140 ) 141 d = self.__dict__ 142 d['environ'] = environ 143 if kw: 144 cls = self.__class__ 145 if 'method' in kw: 146 # set method first, because .body setters 147 # depend on it for checks 148 self.method = kw.pop('method') 149 for name, value in kw.items(): 150 if not hasattr(cls, name): 151 raise TypeError( 152 "Unexpected keyword: %s=%r" % (name, value)) 153 setattr(self, name, value) 154 155 if PY3: # pragma: no cover 156 def encget(self, key, default=NoDefault, encattr=None): 157 val = self.environ.get(key, default) 158 if val is NoDefault: 159 raise KeyError(key) 160 if val is default: 161 return default 162 if not encattr: 163 return val 164 encoding = getattr(self, encattr) 165 if encoding in _LATIN_ENCODINGS: # shortcut 166 return val 167 return bytes_(val, 'latin-1').decode(encoding) 168 else: 169 def encget(self, key, default=NoDefault, encattr=None): 170 val = self.environ.get(key, default) 171 if val is NoDefault: 172 raise KeyError(key) 173 if val is default: 174 return default 175 if encattr is None: 176 return val 177 encoding = getattr(self, encattr) 178 return val.decode(encoding) 179 180 def encset(self, key, val, encattr=None): 181 if encattr: 182 encoding = getattr(self, encattr) 183 else: 184 encoding = 'ascii' 185 if PY3: # pragma: no cover 186 self.environ[key] = bytes_(val, encoding).decode('latin-1') 187 else: 188 self.environ[key] = bytes_(val, encoding) 189 190 @property 191 def charset(self): 192 if self._charset is None: 193 charset = detect_charset(self._content_type_raw) 194 if _is_utf8(charset): 195 charset = 'UTF-8' 196 self._charset = charset 197 return self._charset 198 199 @charset.setter 200 def charset(self, charset): 201 if _is_utf8(charset): 202 charset = 'UTF-8' 203 if charset != self.charset: 204 raise DeprecationWarning("Use req = req.decode(%r)" % charset) 205 206 def decode(self, charset=None, errors='strict'): 207 charset = charset or self.charset 208 if charset == 'UTF-8': 209 return self 210 # cookies and path are always utf-8 211 t = Transcoder(charset, errors) 212 213 new_content_type = CHARSET_RE.sub('; charset="UTF-8"', 214 self._content_type_raw) 215 content_type = self.content_type 216 r = self.__class__( 217 self.environ.copy(), 218 query_string=t.transcode_query(self.query_string), 219 content_type=new_content_type, 220 ) 221 222 if content_type == 'application/x-www-form-urlencoded': 223 r.body = bytes_(t.transcode_query(native_(r.body))) 224 return r 225 elif content_type != 'multipart/form-data': 226 return r 227 228 fs_environ = self.environ.copy() 229 fs_environ.setdefault('CONTENT_LENGTH', '0') 230 fs_environ['QUERY_STRING'] = '' 231 if PY3: # pragma: no cover 232 fs = cgi_FieldStorage(fp=self.body_file, 233 environ=fs_environ, 234 keep_blank_values=True, 235 encoding=charset, 236 errors=errors) 237 else: 238 fs = cgi_FieldStorage(fp=self.body_file, 239 environ=fs_environ, 240 keep_blank_values=True) 241 242 243 fout = t.transcode_fs(fs, r._content_type_raw) 244 245 # this order is important, because setting body_file 246 # resets content_length 247 r.body_file = fout 248 r.content_length = fout.tell() 249 fout.seek(0) 250 return r 251 252 253 # this is necessary for correct warnings depth for both 254 # BaseRequest and Request (due to AdhocAttrMixin.__setattr__) 255 _setattr_stacklevel = 2 256 257 def _body_file__get(self): 258 """ 259 Input stream of the request (wsgi.input). 260 Setting this property resets the content_length and seekable flag 261 (unlike setting req.body_file_raw). 262 """ 263 if not self.is_body_readable: 264 return io.BytesIO() 265 r = self.body_file_raw 266 clen = self.content_length 267 if not self.is_body_seekable and clen is not None: 268 # we need to wrap input in LimitedLengthFile 269 # but we have to cache the instance as well 270 # otherwise this would stop working 271 # (.remaining counter would reset between calls): 272 # req.body_file.read(100) 273 # req.body_file.read(100) 274 env = self.environ 275 wrapped, raw = env.get('webob._body_file', (0,0)) 276 if raw is not r: 277 wrapped = LimitedLengthFile(r, clen) 278 wrapped = io.BufferedReader(wrapped) 279 env['webob._body_file'] = wrapped, r 280 r = wrapped 281 return r 282 283 def _body_file__set(self, value): 284 if isinstance(value, bytes): 285 warn_deprecation( 286 "Please use req.body = b'bytes' or req.body_file = fileobj", 287 '1.2', 288 self._setattr_stacklevel 289 ) 290 self.content_length = None 291 self.body_file_raw = value 292 self.is_body_seekable = False 293 self.is_body_readable = True 294 def _body_file__del(self): 295 self.body = b'' 296 body_file = property(_body_file__get, 297 _body_file__set, 298 _body_file__del, 299 doc=_body_file__get.__doc__) 300 body_file_raw = environ_getter('wsgi.input') 301 @property 302 def body_file_seekable(self): 303 """ 304 Get the body of the request (wsgi.input) as a seekable file-like 305 object. Middleware and routing applications should use this 306 attribute over .body_file. 307 308 If you access this value, CONTENT_LENGTH will also be updated. 309 """ 310 if not self.is_body_seekable: 311 self.make_body_seekable() 312 return self.body_file_raw 313 314 url_encoding = environ_getter('webob.url_encoding', 'UTF-8') 315 scheme = environ_getter('wsgi.url_scheme') 316 method = environ_getter('REQUEST_METHOD', 'GET') 317 http_version = environ_getter('SERVER_PROTOCOL') 318 content_length = converter( 319 environ_getter('CONTENT_LENGTH', None, '14.13'), 320 parse_int_safe, serialize_int, 'int') 321 remote_user = environ_getter('REMOTE_USER', None) 322 remote_addr = environ_getter('REMOTE_ADDR', None) 323 query_string = environ_getter('QUERY_STRING', '') 324 server_name = environ_getter('SERVER_NAME') 325 server_port = converter( 326 environ_getter('SERVER_PORT'), 327 parse_int, serialize_int, 'int') 328 329 script_name = environ_decoder('SCRIPT_NAME', '', encattr='url_encoding') 330 path_info = environ_decoder('PATH_INFO', encattr='url_encoding') 331 332 # bw compat 333 uscript_name = script_name 334 upath_info = path_info 335 336 _content_type_raw = environ_getter('CONTENT_TYPE', '') 337 338 def _content_type__get(self): 339 """Return the content type, but leaving off any parameters (like 340 charset, but also things like the type in ``application/atom+xml; 341 type=entry``) 342 343 If you set this property, you can include parameters, or if 344 you don't include any parameters in the value then existing 345 parameters will be preserved. 346 """ 347 return self._content_type_raw.split(';', 1)[0] 348 def _content_type__set(self, value=None): 349 if value is not None: 350 value = str(value) 351 if ';' not in value: 352 content_type = self._content_type_raw 353 if ';' in content_type: 354 value += ';' + content_type.split(';', 1)[1] 355 self._content_type_raw = value 356 357 content_type = property(_content_type__get, 358 _content_type__set, 359 _content_type__set, 360 _content_type__get.__doc__) 361 362 _headers = None 363 364 def _headers__get(self): 365 """ 366 All the request headers as a case-insensitive dictionary-like 367 object. 368 """ 369 if self._headers is None: 370 self._headers = EnvironHeaders(self.environ) 371 return self._headers 372 373 def _headers__set(self, value): 374 self.headers.clear() 375 self.headers.update(value) 376 377 headers = property(_headers__get, _headers__set, doc=_headers__get.__doc__) 378 379 @property 380 def client_addr(self): 381 """ 382 The effective client IP address as a string. If the 383 ``HTTP_X_FORWARDED_FOR`` header exists in the WSGI environ, this 384 attribute returns the client IP address present in that header 385 (e.g. if the header value is ``192.168.1.1, 192.168.1.2``, the value 386 will be ``192.168.1.1``). If no ``HTTP_X_FORWARDED_FOR`` header is 387 present in the environ at all, this attribute will return the value 388 of the ``REMOTE_ADDR`` header. If the ``REMOTE_ADDR`` header is 389 unset, this attribute will return the value ``None``. 390 391 .. warning:: 392 393 It is possible for user agents to put someone else's IP or just 394 any string in ``HTTP_X_FORWARDED_FOR`` as it is a normal HTTP 395 header. Forward proxies can also provide incorrect values (private 396 IP addresses etc). You cannot "blindly" trust the result of this 397 method to provide you with valid data unless you're certain that 398 ``HTTP_X_FORWARDED_FOR`` has the correct values. The WSGI server 399 must be behind a trusted proxy for this to be true. 400 """ 401 e = self.environ 402 xff = e.get('HTTP_X_FORWARDED_FOR') 403 if xff is not None: 404 addr = xff.split(',')[0].strip() 405 else: 406 addr = e.get('REMOTE_ADDR') 407 return addr 408 409 @property 410 def host_port(self): 411 """ 412 The effective server port number as a string. If the ``HTTP_HOST`` 413 header exists in the WSGI environ, this attribute returns the port 414 number present in that header. If the ``HTTP_HOST`` header exists but 415 contains no explicit port number: if the WSGI url scheme is "https" , 416 this attribute returns "443", if the WSGI url scheme is "http", this 417 attribute returns "80" . If no ``HTTP_HOST`` header is present in 418 the environ at all, this attribute will return the value of the 419 ``SERVER_PORT`` header (which is guaranteed to be present). 420 """ 421 e = self.environ 422 host = e.get('HTTP_HOST') 423 if host is not None: 424 if ':' in host: 425 host, port = host.split(':', 1) 426 else: 427 url_scheme = e['wsgi.url_scheme'] 428 if url_scheme == 'https': 429 port = '443' 430 else: 431 port = '80' 432 else: 433 port = e['SERVER_PORT'] 434 return port 435 436 @property 437 def host_url(self): 438 """ 439 The URL through the host (no path) 440 """ 441 e = self.environ 442 scheme = e.get('wsgi.url_scheme') 443 url = scheme + '://' 444 host = e.get('HTTP_HOST') 445 if host is not None: 446 if ':' in host: 447 host, port = host.split(':', 1) 448 else: 449 port = None 450 else: 451 host = e.get('SERVER_NAME') 452 port = e.get('SERVER_PORT') 453 if scheme == 'https': 454 if port == '443': 455 port = None 456 elif scheme == 'http': 457 if port == '80': 458 port = None 459 url += host 460 if port: 461 url += ':%s' % port 462 return url 463 464 @property 465 def application_url(self): 466 """ 467 The URL including SCRIPT_NAME (no PATH_INFO or query string) 468 """ 469 bscript_name = bytes_(self.script_name, self.url_encoding) 470 return self.host_url + url_quote(bscript_name, PATH_SAFE) 471 472 @property 473 def path_url(self): 474 """ 475 The URL including SCRIPT_NAME and PATH_INFO, but not QUERY_STRING 476 """ 477 bpath_info = bytes_(self.path_info, self.url_encoding) 478 return self.application_url + url_quote(bpath_info, PATH_SAFE) 479 480 @property 481 def path(self): 482 """ 483 The path of the request, without host or query string 484 """ 485 bscript = bytes_(self.script_name, self.url_encoding) 486 bpath = bytes_(self.path_info, self.url_encoding) 487 return url_quote(bscript, PATH_SAFE) + url_quote(bpath, PATH_SAFE) 488 489 @property 490 def path_qs(self): 491 """ 492 The path of the request, without host but with query string 493 """ 494 path = self.path 495 qs = self.environ.get('QUERY_STRING') 496 if qs: 497 path += '?' + qs 498 return path 499 500 @property 501 def url(self): 502 """ 503 The full request URL, including QUERY_STRING 504 """ 505 url = self.path_url 506 qs = self.environ.get('QUERY_STRING') 507 if qs: 508 url += '?' + qs 509 return url 510 511 def relative_url(self, other_url, to_application=False): 512 """ 513 Resolve other_url relative to the request URL. 514 515 If ``to_application`` is True, then resolve it relative to the 516 URL with only SCRIPT_NAME 517 """ 518 if to_application: 519 url = self.application_url 520 if not url.endswith('/'): 521 url += '/' 522 else: 523 url = self.path_url 524 return urlparse.urljoin(url, other_url) 525 526 def path_info_pop(self, pattern=None): 527 """ 528 'Pops' off the next segment of PATH_INFO, pushing it onto 529 SCRIPT_NAME, and returning the popped segment. Returns None if 530 there is nothing left on PATH_INFO. 531 532 Does not return ``''`` when there's an empty segment (like 533 ``/path//path``); these segments are just ignored. 534 535 Optional ``pattern`` argument is a regexp to match the return value 536 before returning. If there is no match, no changes are made to the 537 request and None is returned. 538 """ 539 path = self.path_info 540 if not path: 541 return None 542 slashes = '' 543 while path.startswith('/'): 544 slashes += '/' 545 path = path[1:] 546 idx = path.find('/') 547 if idx == -1: 548 idx = len(path) 549 r = path[:idx] 550 if pattern is None or re.match(pattern, r): 551 self.script_name += slashes + r 552 self.path_info = path[idx:] 553 return r 554 555 def path_info_peek(self): 556 """ 557 Returns the next segment on PATH_INFO, or None if there is no 558 next segment. Doesn't modify the environment. 559 """ 560 path = self.path_info 561 if not path: 562 return None 563 path = path.lstrip('/') 564 return path.split('/', 1)[0] 565 566 def _urlvars__get(self): 567 """ 568 Return any *named* variables matched in the URL. 569 570 Takes values from ``environ['wsgiorg.routing_args']``. 571 Systems like ``routes`` set this value. 572 """ 573 if 'paste.urlvars' in self.environ: 574 return self.environ['paste.urlvars'] 575 elif 'wsgiorg.routing_args' in self.environ: 576 return self.environ['wsgiorg.routing_args'][1] 577 else: 578 result = {} 579 self.environ['wsgiorg.routing_args'] = ((), result) 580 return result 581 582 def _urlvars__set(self, value): 583 environ = self.environ 584 if 'wsgiorg.routing_args' in environ: 585 environ['wsgiorg.routing_args'] = ( 586 environ['wsgiorg.routing_args'][0], value) 587 if 'paste.urlvars' in environ: 588 del environ['paste.urlvars'] 589 elif 'paste.urlvars' in environ: 590 environ['paste.urlvars'] = value 591 else: 592 environ['wsgiorg.routing_args'] = ((), value) 593 594 def _urlvars__del(self): 595 if 'paste.urlvars' in self.environ: 596 del self.environ['paste.urlvars'] 597 if 'wsgiorg.routing_args' in self.environ: 598 if not self.environ['wsgiorg.routing_args'][0]: 599 del self.environ['wsgiorg.routing_args'] 600 else: 601 self.environ['wsgiorg.routing_args'] = ( 602 self.environ['wsgiorg.routing_args'][0], {}) 603 604 urlvars = property(_urlvars__get, 605 _urlvars__set, 606 _urlvars__del, 607 doc=_urlvars__get.__doc__) 608 609 def _urlargs__get(self): 610 """ 611 Return any *positional* variables matched in the URL. 612 613 Takes values from ``environ['wsgiorg.routing_args']``. 614 Systems like ``routes`` set this value. 615 """ 616 if 'wsgiorg.routing_args' in self.environ: 617 return self.environ['wsgiorg.routing_args'][0] 618 else: 619 # Since you can't update this value in-place, we don't need 620 # to set the key in the environment 621 return () 622 623 def _urlargs__set(self, value): 624 environ = self.environ 625 if 'paste.urlvars' in environ: 626 # Some overlap between this and wsgiorg.routing_args; we need 627 # wsgiorg.routing_args to make this work 628 routing_args = (value, environ.pop('paste.urlvars')) 629 elif 'wsgiorg.routing_args' in environ: 630 routing_args = (value, environ['wsgiorg.routing_args'][1]) 631 else: 632 routing_args = (value, {}) 633 environ['wsgiorg.routing_args'] = routing_args 634 635 def _urlargs__del(self): 636 if 'wsgiorg.routing_args' in self.environ: 637 if not self.environ['wsgiorg.routing_args'][1]: 638 del self.environ['wsgiorg.routing_args'] 639 else: 640 self.environ['wsgiorg.routing_args'] = ( 641 (), self.environ['wsgiorg.routing_args'][1]) 642 643 urlargs = property(_urlargs__get, 644 _urlargs__set, 645 _urlargs__del, 646 _urlargs__get.__doc__) 647 648 @property 649 def is_xhr(self): 650 """Is X-Requested-With header present and equal to ``XMLHttpRequest``? 651 652 Note: this isn't set by every XMLHttpRequest request, it is 653 only set if you are using a Javascript library that sets it 654 (or you set the header yourself manually). Currently 655 Prototype and jQuery are known to set this header.""" 656 return self.environ.get('HTTP_X_REQUESTED_WITH', '') == 'XMLHttpRequest' 657 658 def _host__get(self): 659 """Host name provided in HTTP_HOST, with fall-back to SERVER_NAME""" 660 if 'HTTP_HOST' in self.environ: 661 return self.environ['HTTP_HOST'] 662 else: 663 return '%(SERVER_NAME)s:%(SERVER_PORT)s' % self.environ 664 def _host__set(self, value): 665 self.environ['HTTP_HOST'] = value 666 def _host__del(self): 667 if 'HTTP_HOST' in self.environ: 668 del self.environ['HTTP_HOST'] 669 host = property(_host__get, _host__set, _host__del, doc=_host__get.__doc__) 670 671 @property 672 def domain(self): 673 """ Returns the domain portion of the host value. Equivalent to: 674 675 .. code-block:: python 676 677 domain = request.host 678 if ':' in domain: 679 domain = domain.split(':', 1)[0] 680 681 This will be equivalent to the domain portion of the ``HTTP_HOST`` 682 value in the environment if it exists, or the ``SERVER_NAME`` value in 683 the environment if it doesn't. For example, if the environment 684 contains an ``HTTP_HOST`` value of ``foo.example.com:8000``, 685 ``request.domain`` will return ``foo.example.com``. 686 687 Note that this value cannot be *set* on the request. To set the host 688 value use :meth:`webob.request.Request.host` instead. 689 """ 690 domain = self.host 691 if ':' in domain: 692 domain = domain.split(':', 1)[0] 693 return domain 694 695 def _body__get(self): 696 """ 697 Return the content of the request body. 698 """ 699 if not self.is_body_readable: 700 return b'' 701 self.make_body_seekable() # we need this to have content_length 702 r = self.body_file.read(self.content_length) 703 self.body_file_raw.seek(0) 704 return r 705 def _body__set(self, value): 706 if value is None: 707 value = b'' 708 if not isinstance(value, bytes): 709 raise TypeError("You can only set Request.body to bytes (not %r)" 710 % type(value)) 711 if not http_method_probably_has_body.get(self.method, True): 712 if not value: 713 self.content_length = None 714 self.body_file_raw = io.BytesIO() 715 return 716 self.content_length = len(value) 717 self.body_file_raw = io.BytesIO(value) 718 self.is_body_seekable = True 719 def _body__del(self): 720 self.body = b'' 721 body = property(_body__get, _body__set, _body__del, doc=_body__get.__doc__) 722 723 def _json_body__get(self): 724 """Access the body of the request as JSON""" 725 return json.loads(self.body.decode(self.charset)) 726 727 def _json_body__set(self, value): 728 self.body = json.dumps(value, separators=(',', ':')).encode(self.charset) 729 730 def _json_body__del(self): 731 del self.body 732 733 json = json_body = property(_json_body__get, _json_body__set, _json_body__del) 734 735 def _text__get(self): 736 """ 737 Get/set the text value of the body 738 """ 739 if not self.charset: 740 raise AttributeError( 741 "You cannot access Request.text unless charset is set") 742 body = self.body 743 return body.decode(self.charset) 744 745 def _text__set(self, value): 746 if not self.charset: 747 raise AttributeError( 748 "You cannot access Response.text unless charset is set") 749 if not isinstance(value, text_type): 750 raise TypeError( 751 "You can only set Request.text to a unicode string " 752 "(not %s)" % type(value)) 753 self.body = value.encode(self.charset) 754 755 def _text__del(self): 756 del self.body 757 758 text = property(_text__get, _text__set, _text__del, doc=_text__get.__doc__) 759 760 761 @property 762 def POST(self): 763 """ 764 Return a MultiDict containing all the variables from a form 765 request. Returns an empty dict-like object for non-form requests. 766 767 Form requests are typically POST requests, however PUT & PATCH requests 768 with an appropriate Content-Type are also supported. 769 """ 770 env = self.environ 771 if self.method not in ('POST', 'PUT', 'PATCH'): 772 return NoVars('Not a form request') 773 if 'webob._parsed_post_vars' in env: 774 vars, body_file = env['webob._parsed_post_vars'] 775 if body_file is self.body_file_raw: 776 return vars 777 content_type = self.content_type 778 if ((self.method == 'PUT' and not content_type) 779 or content_type not in 780 ('', 781 'application/x-www-form-urlencoded', 782 'multipart/form-data') 783 ): 784 # Not an HTML form submission 785 return NoVars('Not an HTML form submission (Content-Type: %s)' 786 % content_type) 787 self._check_charset() 788 789 self.make_body_seekable() 790 self.body_file_raw.seek(0) 791 792 fs_environ = env.copy() 793 # FieldStorage assumes a missing CONTENT_LENGTH, but a 794 # default of 0 is better: 795 fs_environ.setdefault('CONTENT_LENGTH', '0') 796 fs_environ['QUERY_STRING'] = '' 797 if PY3: # pragma: no cover 798 fs = cgi_FieldStorage( 799 fp=self.body_file, 800 environ=fs_environ, 801 keep_blank_values=True, 802 encoding='utf8') 803 vars = MultiDict.from_fieldstorage(fs) 804 else: 805 fs = cgi_FieldStorage( 806 fp=self.body_file, 807 environ=fs_environ, 808 keep_blank_values=True) 809 vars = MultiDict.from_fieldstorage(fs) 810 811 env['webob._parsed_post_vars'] = (vars, self.body_file_raw) 812 return vars 813 814 @property 815 def GET(self): 816 """ 817 Return a MultiDict containing all the variables from the 818 QUERY_STRING. 819 """ 820 env = self.environ 821 source = env.get('QUERY_STRING', '') 822 if 'webob._parsed_query_vars' in env: 823 vars, qs = env['webob._parsed_query_vars'] 824 if qs == source: 825 return vars 826 827 data = [] 828 if source: 829 # this is disabled because we want to access req.GET 830 # for text/plain; charset=ascii uploads for example 831 #self._check_charset() 832 data = parse_qsl_text(source) 833 #d = lambda b: b.decode('utf8') 834 #data = [(d(k), d(v)) for k,v in data] 835 vars = GetDict(data, env) 836 env['webob._parsed_query_vars'] = (vars, source) 837 return vars 838 839 def _check_charset(self): 840 if self.charset != 'UTF-8': 841 raise DeprecationWarning( 842 "Requests are expected to be submitted in UTF-8, not %s. " 843 "You can fix this by doing req = req.decode('%s')" % ( 844 self.charset, self.charset) 845 ) 846 847 @property 848 def params(self): 849 """ 850 A dictionary-like object containing both the parameters from 851 the query string and request body. 852 """ 853 params = NestedMultiDict(self.GET, self.POST) 854 return params 855 856 857 @property 858 def cookies(self): 859 """ 860 Return a dictionary of cookies as found in the request. 861 """ 862 return RequestCookies(self.environ) 863 864 @cookies.setter 865 def cookies(self, val): 866 self.environ.pop('HTTP_COOKIE', None) 867 r = RequestCookies(self.environ) 868 r.update(val) 869 870 def copy(self): 871 """ 872 Copy the request and environment object. 873 874 This only does a shallow copy, except of wsgi.input 875 """ 876 self.make_body_seekable() 877 env = self.environ.copy() 878 new_req = self.__class__(env) 879 new_req.copy_body() 880 return new_req 881 882 def copy_get(self): 883 """ 884 Copies the request and environment object, but turning this request 885 into a GET along the way. If this was a POST request (or any other 886 verb) then it becomes GET, and the request body is thrown away. 887 """ 888 env = self.environ.copy() 889 return self.__class__(env, method='GET', content_type=None, 890 body=b'') 891 892 # webob.is_body_seekable marks input streams that are seekable 893 # this way we can have seekable input without testing the .seek() method 894 is_body_seekable = environ_getter('webob.is_body_seekable', False) 895 896 #is_body_readable = environ_getter('webob.is_body_readable', False) 897 898 def _is_body_readable__get(self): 899 """ 900 webob.is_body_readable is a flag that tells us 901 that we can read the input stream even though 902 CONTENT_LENGTH is missing. This allows FakeCGIBody 903 to work and can be used by servers to support 904 chunked encoding in requests. 905 For background see https://bitbucket.org/ianb/webob/issue/6 906 """ 907 if http_method_probably_has_body.get(self.method): 908 # known HTTP method with body 909 return True 910 elif self.content_length is not None: 911 # unknown HTTP method, but the Content-Length 912 # header is present 913 return True 914 else: 915 # last resort -- rely on the special flag 916 return self.environ.get('webob.is_body_readable', False) 917 918 def _is_body_readable__set(self, flag): 919 self.environ['webob.is_body_readable'] = bool(flag) 920 921 is_body_readable = property(_is_body_readable__get, _is_body_readable__set, 922 doc=_is_body_readable__get.__doc__ 923 ) 924 925 926 927 def make_body_seekable(self): 928 """ 929 This forces ``environ['wsgi.input']`` to be seekable. 930 That means that, the content is copied into a BytesIO or temporary 931 file and flagged as seekable, so that it will not be unnecessarily 932 copied again. 933 934 After calling this method the .body_file is always seeked to the 935 start of file and .content_length is not None. 936 937 The choice to copy to BytesIO is made from 938 ``self.request_body_tempfile_limit`` 939 """ 940 if self.is_body_seekable: 941 self.body_file_raw.seek(0) 942 else: 943 self.copy_body() 944 945 946 def copy_body(self): 947 """ 948 Copies the body, in cases where it might be shared with 949 another request object and that is not desired. 950 951 This copies the body in-place, either into a BytesIO object 952 or a temporary file. 953 """ 954 if not self.is_body_readable: 955 # there's no body to copy 956 self.body = b'' 957 elif self.content_length is None: 958 # chunked body or FakeCGIBody 959 self.body = self.body_file_raw.read() 960 self._copy_body_tempfile() 961 else: 962 # try to read body into tempfile 963 did_copy = self._copy_body_tempfile() 964 if not did_copy: 965 # it wasn't necessary, so just read it into memory 966 self.body = self.body_file.read(self.content_length) 967 968 def _copy_body_tempfile(self): 969 """ 970 Copy wsgi.input to tempfile if necessary. Returns True if it did. 971 """ 972 tempfile_limit = self.request_body_tempfile_limit 973 todo = self.content_length 974 assert isinstance(todo, integer_types), todo 975 if not tempfile_limit or todo <= tempfile_limit: 976 return False 977 fileobj = self.make_tempfile() 978 input = self.body_file 979 while todo > 0: 980 data = input.read(min(todo, 65536)) 981 if not data: 982 # Normally this should not happen, because LimitedLengthFile 983 # should have raised an exception by now. 984 # It can happen if the is_body_seekable flag is incorrect. 985 raise DisconnectionError( 986 "Client disconnected (%s more bytes were expected)" 987 % todo 988 ) 989 fileobj.write(data) 990 todo -= len(data) 991 fileobj.seek(0) 992 self.body_file_raw = fileobj 993 self.is_body_seekable = True 994 return True 995 996 def make_tempfile(self): 997 """ 998 Create a tempfile to store big request body. 999 This API is not stable yet. A 'size' argument might be added. 1000 """ 1001 return tempfile.TemporaryFile() 1002 1003 1004 def remove_conditional_headers(self, 1005 remove_encoding=True, 1006 remove_range=True, 1007 remove_match=True, 1008 remove_modified=True): 1009 """ 1010 Remove headers that make the request conditional. 1011 1012 These headers can cause the response to be 304 Not Modified, 1013 which in some cases you may not want to be possible. 1014 1015 This does not remove headers like If-Match, which are used for 1016 conflict detection. 1017 """ 1018 check_keys = [] 1019 if remove_range: 1020 check_keys += ['HTTP_IF_RANGE', 'HTTP_RANGE'] 1021 if remove_match: 1022 check_keys.append('HTTP_IF_NONE_MATCH') 1023 if remove_modified: 1024 check_keys.append('HTTP_IF_MODIFIED_SINCE') 1025 if remove_encoding: 1026 check_keys.append('HTTP_ACCEPT_ENCODING') 1027 1028 for key in check_keys: 1029 if key in self.environ: 1030 del self.environ[key] 1031 1032 1033 accept = accept_property('Accept', '14.1', MIMEAccept, MIMENilAccept) 1034 accept_charset = accept_property('Accept-Charset', '14.2', AcceptCharset) 1035 accept_encoding = accept_property('Accept-Encoding', '14.3', 1036 NilClass=NoAccept) 1037 accept_language = accept_property('Accept-Language', '14.4', AcceptLanguage) 1038 1039 authorization = converter( 1040 environ_getter('HTTP_AUTHORIZATION', None, '14.8'), 1041 parse_auth, serialize_auth, 1042 ) 1043 1044 1045 def _cache_control__get(self): 1046 """ 1047 Get/set/modify the Cache-Control header (`HTTP spec section 14.9 1048 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9>`_) 1049 """ 1050 env = self.environ 1051 value = env.get('HTTP_CACHE_CONTROL', '') 1052 cache_header, cache_obj = env.get('webob._cache_control', (None, None)) 1053 if cache_obj is not None and cache_header == value: 1054 return cache_obj 1055 cache_obj = CacheControl.parse(value, 1056 updates_to=self._update_cache_control, 1057 type='request') 1058 env['webob._cache_control'] = (value, cache_obj) 1059 return cache_obj 1060 1061 def _cache_control__set(self, value): 1062 env = self.environ 1063 value = value or '' 1064 if isinstance(value, dict): 1065 value = CacheControl(value, type='request') 1066 if isinstance(value, CacheControl): 1067 str_value = str(value) 1068 env['HTTP_CACHE_CONTROL'] = str_value 1069 env['webob._cache_control'] = (str_value, value) 1070 else: 1071 env['HTTP_CACHE_CONTROL'] = str(value) 1072 env['webob._cache_control'] = (None, None) 1073 1074 def _cache_control__del(self): 1075 env = self.environ 1076 if 'HTTP_CACHE_CONTROL' in env: 1077 del env['HTTP_CACHE_CONTROL'] 1078 if 'webob._cache_control' in env: 1079 del env['webob._cache_control'] 1080 1081 def _update_cache_control(self, prop_dict): 1082 self.environ['HTTP_CACHE_CONTROL'] = serialize_cache_control(prop_dict) 1083 1084 cache_control = property(_cache_control__get, 1085 _cache_control__set, 1086 _cache_control__del, 1087 doc=_cache_control__get.__doc__) 1088 1089 1090 if_match = etag_property('HTTP_IF_MATCH', AnyETag, '14.24') 1091 if_none_match = etag_property('HTTP_IF_NONE_MATCH', NoETag, '14.26', 1092 strong=False) 1093 1094 date = converter_date(environ_getter('HTTP_DATE', None, '14.8')) 1095 if_modified_since = converter_date( 1096 environ_getter('HTTP_IF_MODIFIED_SINCE', None, '14.25')) 1097 if_unmodified_since = converter_date( 1098 environ_getter('HTTP_IF_UNMODIFIED_SINCE', None, '14.28')) 1099 if_range = converter( 1100 environ_getter('HTTP_IF_RANGE', None, '14.27'), 1101 IfRange.parse, serialize_if_range, 'IfRange object') 1102 1103 1104 max_forwards = converter( 1105 environ_getter('HTTP_MAX_FORWARDS', None, '14.31'), 1106 parse_int, serialize_int, 'int') 1107 1108 pragma = environ_getter('HTTP_PRAGMA', None, '14.32') 1109 1110 range = converter( 1111 environ_getter('HTTP_RANGE', None, '14.35'), 1112 parse_range, serialize_range, 'Range object') 1113 1114 referer = environ_getter('HTTP_REFERER', None, '14.36') 1115 referrer = referer 1116 1117 user_agent = environ_getter('HTTP_USER_AGENT', None, '14.43') 1118 1119 def __repr__(self): 1120 try: 1121 name = '%s %s' % (self.method, self.url) 1122 except KeyError: 1123 name = '(invalid WSGI environ)' 1124 msg = '<%s at 0x%x %s>' % ( 1125 self.__class__.__name__, 1126 abs(id(self)), name) 1127 return msg 1128 1129 def as_bytes(self, skip_body=False): 1130 """ 1131 Return HTTP bytes representing this request. 1132 If skip_body is True, exclude the body. 1133 If skip_body is an integer larger than one, skip body 1134 only if its length is bigger than that number. 1135 """ 1136 url = self.url 1137 host = self.host_url 1138 assert url.startswith(host) 1139 url = url[len(host):] 1140 parts = [bytes_('%s %s %s' % (self.method, url, self.http_version))] 1141 #self.headers.setdefault('Host', self.host) 1142 1143 # acquire body before we handle headers so that 1144 # content-length will be set 1145 body = None 1146 if http_method_probably_has_body.get(self.method): 1147 if skip_body > 1: 1148 if len(self.body) > skip_body: 1149 body = bytes_('<body skipped (len=%s)>' % len(self.body)) 1150 else: 1151 skip_body = False 1152 if not skip_body: 1153 body = self.body 1154 1155 for k, v in sorted(self.headers.items()): 1156 header = bytes_('%s: %s' % (k, v)) 1157 parts.append(header) 1158 1159 if body: 1160 parts.extend([b'', body]) 1161 # HTTP clearly specifies CRLF 1162 return b'\r\n'.join(parts) 1163 1164 def as_string(self, skip_body=False): 1165 # TODO: Remove in 1.4 1166 warn_deprecation( 1167 "Please use req.as_bytes", 1168 '1.3', 1169 self._setattr_stacklevel 1170 ) 1171 1172 def as_text(self): 1173 bytes = self.as_bytes() 1174 return bytes.decode(self.charset) 1175 1176 __str__ = as_text 1177 1178 @classmethod 1179 def from_bytes(cls, b): 1180 """ 1181 Create a request from HTTP bytes data. If the bytes contain 1182 extra data after the request, raise a ValueError. 1183 """ 1184 f = io.BytesIO(b) 1185 r = cls.from_file(f) 1186 if f.tell() != len(b): 1187 raise ValueError("The string contains more data than expected") 1188 return r 1189 1190 @classmethod 1191 def from_string(cls, b): 1192 # TODO: Remove in 1.4 1193 warn_deprecation( 1194 "Please use req.from_bytes", 1195 '1.3', 1196 cls._setattr_stacklevel 1197 ) 1198 1199 @classmethod 1200 def from_text(cls, s): 1201 b = bytes_(s, 'utf-8') 1202 return cls.from_bytes(b) 1203 1204 @classmethod 1205 def from_file(cls, fp): 1206 """Read a request from a file-like object (it must implement 1207 ``.read(size)`` and ``.readline()``). 1208 1209 It will read up to the end of the request, not the end of the 1210 file (unless the request is a POST or PUT and has no 1211 Content-Length, in that case, the entire file is read). 1212 1213 This reads the request as represented by ``str(req)``; it may 1214 not read every valid HTTP request properly. 1215 """ 1216 start_line = fp.readline() 1217 is_text = isinstance(start_line, text_type) 1218 if is_text: 1219 crlf = '\r\n' 1220 colon = ':' 1221 else: 1222 crlf = b'\r\n' 1223 colon = b':' 1224 try: 1225 header = start_line.rstrip(crlf) 1226 method, resource, http_version = header.split(None, 2) 1227 method = native_(method, 'utf-8') 1228 resource = native_(resource, 'utf-8') 1229 http_version = native_(http_version, 'utf-8') 1230 except ValueError: 1231 raise ValueError('Bad HTTP request line: %r' % start_line) 1232 r = cls(environ_from_url(resource), 1233 http_version=http_version, 1234 method=method.upper() 1235 ) 1236 del r.environ['HTTP_HOST'] 1237 while 1: 1238 line = fp.readline() 1239 if not line.strip(): 1240 # end of headers 1241 break 1242 hname, hval = line.split(colon, 1) 1243 hname = native_(hname, 'utf-8') 1244 hval = native_(hval, 'utf-8').strip() 1245 if hname in r.headers: 1246 hval = r.headers[hname] + ', ' + hval 1247 r.headers[hname] = hval 1248 if http_method_probably_has_body.get(r.method): 1249 clen = r.content_length 1250 if clen is None: 1251 body = fp.read() 1252 else: 1253 body = fp.read(clen) 1254 if is_text: 1255 body = bytes_(body, 'utf-8') 1256 r.body = body 1257 return r 1258 1259 def call_application(self, application, catch_exc_info=False): 1260 """ 1261 Call the given WSGI application, returning ``(status_string, 1262 headerlist, app_iter)`` 1263 1264 Be sure to call ``app_iter.close()`` if it's there. 1265 1266 If catch_exc_info is true, then returns ``(status_string, 1267 headerlist, app_iter, exc_info)``, where the fourth item may 1268 be None, but won't be if there was an exception. If you don't 1269 do this and there was an exception, the exception will be 1270 raised directly. 1271 """ 1272 if self.is_body_seekable: 1273 self.body_file_raw.seek(0) 1274 captured = [] 1275 output = [] 1276 def start_response(status, headers, exc_info=None): 1277 if exc_info is not None and not catch_exc_info: 1278 reraise(exc_info) 1279 captured[:] = [status, headers, exc_info] 1280 return output.append 1281 app_iter = application(self.environ, start_response) 1282 if output or not captured: 1283 try: 1284 output.extend(app_iter) 1285 finally: 1286 if hasattr(app_iter, 'close'): 1287 app_iter.close() 1288 app_iter = output 1289 if catch_exc_info: 1290 return (captured[0], captured[1], app_iter, captured[2]) 1291 else: 1292 return (captured[0], captured[1], app_iter) 1293 1294 # Will be filled in later: 1295 ResponseClass = None 1296 1297 def send(self, application=None, catch_exc_info=False): 1298 """ 1299 Like ``.call_application(application)``, except returns a 1300 response object with ``.status``, ``.headers``, and ``.body`` 1301 attributes. 1302 1303 This will use ``self.ResponseClass`` to figure out the class 1304 of the response object to return. 1305 1306 If ``application`` is not given, this will send the request to 1307 ``self.make_default_send_app()`` 1308 """ 1309 if application is None: 1310 application = self.make_default_send_app() 1311 if catch_exc_info: 1312 status, headers, app_iter, exc_info = self.call_application( 1313 application, catch_exc_info=True) 1314 del exc_info 1315 else: 1316 status, headers, app_iter = self.call_application( 1317 application, catch_exc_info=False) 1318 return self.ResponseClass( 1319 status=status, headerlist=list(headers), app_iter=app_iter) 1320 1321 get_response = send 1322 1323 def make_default_send_app(self): 1324 global _client 1325 try: 1326 client = _client 1327 except NameError: 1328 from webob import client 1329 _client = client 1330 return client.send_request_app 1331 1332 @classmethod 1333 def blank(cls, path, environ=None, base_url=None, 1334 headers=None, POST=None, **kw): 1335 """ 1336 Create a blank request environ (and Request wrapper) with the 1337 given path (path should be urlencoded), and any keys from 1338 environ. 1339 1340 The path will become path_info, with any query string split 1341 off and used. 1342 1343 All necessary keys will be added to the environ, but the 1344 values you pass in will take precedence. If you pass in 1345 base_url then wsgi.url_scheme, HTTP_HOST, and SCRIPT_NAME will 1346 be filled in from that value. 1347 1348 Any extra keyword will be passed to ``__init__``. 1349 """ 1350 env = environ_from_url(path) 1351 if base_url: 1352 scheme, netloc, path, query, fragment = urlparse.urlsplit(base_url) 1353 if query or fragment: 1354 raise ValueError( 1355 "base_url (%r) cannot have a query or fragment" 1356 % base_url) 1357 if scheme: 1358 env['wsgi.url_scheme'] = scheme 1359 if netloc: 1360 if ':' not in netloc: 1361 if scheme == 'http': 1362 netloc += ':80' 1363 elif scheme == 'https': 1364 netloc += ':443' 1365 else: 1366 raise ValueError( 1367 "Unknown scheme: %r" % scheme) 1368 host, port = netloc.split(':', 1) 1369 env['SERVER_PORT'] = port 1370 env['SERVER_NAME'] = host 1371 env['HTTP_HOST'] = netloc 1372 if path: 1373 env['SCRIPT_NAME'] = url_unquote(path) 1374 if environ: 1375 env.update(environ) 1376 content_type = kw.get('content_type', env.get('CONTENT_TYPE')) 1377 if headers and 'Content-Type' in headers: 1378 content_type = headers['Content-Type'] 1379 if content_type is not None: 1380 kw['content_type'] = content_type 1381 environ_add_POST(env, POST, content_type=content_type) 1382 obj = cls(env, **kw) 1383 if headers is not None: 1384 obj.headers.update(headers) 1385 return obj 1386 1387 class LegacyRequest(BaseRequest): 1388 uscript_name = upath_property('SCRIPT_NAME') 1389 upath_info = upath_property('PATH_INFO') 1390 1391 def encget(self, key, default=NoDefault, encattr=None): 1392 val = self.environ.get(key, default) 1393 if val is NoDefault: 1394 raise KeyError(key) 1395 if val is default: 1396 return default 1397 return val 1398 1399 class AdhocAttrMixin(object): 1400 _setattr_stacklevel = 3 1401 1402 def __setattr__(self, attr, value, DEFAULT=object()): 1403 if (getattr(self.__class__, attr, DEFAULT) is not DEFAULT or 1404 attr.startswith('_')): 1405 object.__setattr__(self, attr, value) 1406 else: 1407 self.environ.setdefault('webob.adhoc_attrs', {})[attr] = value 1408 1409 def __getattr__(self, attr, DEFAULT=object()): 1410 try: 1411 return self.environ['webob.adhoc_attrs'][attr] 1412 except KeyError: 1413 raise AttributeError(attr) 1414 1415 def __delattr__(self, attr, DEFAULT=object()): 1416 if getattr(self.__class__, attr, DEFAULT) is not DEFAULT: 1417 return object.__delattr__(self, attr) 1418 try: 1419 del self.environ['webob.adhoc_attrs'][attr] 1420 except KeyError: 1421 raise AttributeError(attr) 1422 1423 class Request(AdhocAttrMixin, BaseRequest): 1424 """ The default request implementation """ 1425 1426 def environ_from_url(path): 1427 if SCHEME_RE.search(path): 1428 scheme, netloc, path, qs, fragment = urlparse.urlsplit(path) 1429 if fragment: 1430 raise TypeError("Path cannot contain a fragment (%r)" % fragment) 1431 if qs: 1432 path += '?' + qs 1433 if ':' not in netloc: 1434 if scheme == 'http': 1435 netloc += ':80' 1436 elif scheme == 'https': 1437 netloc += ':443' 1438 else: 1439 raise TypeError("Unknown scheme: %r" % scheme) 1440 else: 1441 scheme = 'http' 1442 netloc = 'localhost:80' 1443 if path and '?' in path: 1444 path_info, query_string = path.split('?', 1) 1445 path_info = url_unquote(path_info) 1446 else: 1447 path_info = url_unquote(path) 1448 query_string = '' 1449 env = { 1450 'REQUEST_METHOD': 'GET', 1451 'SCRIPT_NAME': '', 1452 'PATH_INFO': path_info or '', 1453 'QUERY_STRING': query_string, 1454 'SERVER_NAME': netloc.split(':')[0], 1455 'SERVER_PORT': netloc.split(':')[1], 1456 'HTTP_HOST': netloc, 1457 'SERVER_PROTOCOL': 'HTTP/1.0', 1458 'wsgi.version': (1, 0), 1459 'wsgi.url_scheme': scheme, 1460 'wsgi.input': io.BytesIO(), 1461 'wsgi.errors': sys.stderr, 1462 'wsgi.multithread': False, 1463 'wsgi.multiprocess': False, 1464 'wsgi.run_once': False, 1465 #'webob.is_body_seekable': True, 1466 } 1467 return env 1468 1469 1470 def environ_add_POST(env, data, content_type=None): 1471 if data is None: 1472 return 1473 elif isinstance(data, text_type): # pragma: no cover 1474 data = data.encode('ascii') 1475 if env['REQUEST_METHOD'] not in ('POST', 'PUT'): 1476 env['REQUEST_METHOD'] = 'POST' 1477 has_files = False 1478 if hasattr(data, 'items'): 1479 data = list(data.items()) 1480 for k, v in data: 1481 if isinstance(v, (tuple, list)): 1482 has_files = True 1483 break 1484 if content_type is None: 1485 if has_files: 1486 content_type = 'multipart/form-data' 1487 else: 1488 content_type = 'application/x-www-form-urlencoded' 1489 if content_type.startswith('multipart/form-data'): 1490 if not isinstance(data, bytes): 1491 content_type, data = _encode_multipart(data, content_type) 1492 elif content_type.startswith('application/x-www-form-urlencoded'): 1493 if has_files: 1494 raise ValueError('Submiting files is not allowed for' 1495 ' content type `%s`' % content_type) 1496 if not isinstance(data, bytes): 1497 data = url_encode(data) 1498 else: 1499 if not isinstance(data, bytes): 1500 raise ValueError('Please provide `POST` data as string' 1501 ' for content type `%s`' % content_type) 1502 data = bytes_(data, 'utf8') 1503 env['wsgi.input'] = io.BytesIO(data) 1504 env['webob.is_body_seekable'] = True 1505 env['CONTENT_LENGTH'] = str(len(data)) 1506 env['CONTENT_TYPE'] = content_type 1507 1508 1509 1510 ######################### 1511 ## Helper classes and monkeypatching 1512 ######################### 1513 1514 class DisconnectionError(IOError): 1515 pass 1516 1517 1518 class LimitedLengthFile(io.RawIOBase): 1519 def __init__(self, file, maxlen): 1520 self.file = file 1521 self.maxlen = maxlen 1522 self.remaining = maxlen 1523 1524 def __repr__(self): 1525 return '<%s(%r, maxlen=%s)>' % ( 1526 self.__class__.__name__, 1527 self.file, 1528 self.maxlen 1529 ) 1530 1531 def fileno(self): 1532 return self.file.fileno() 1533 1534 @staticmethod 1535 def readable(): 1536 return True 1537 1538 def readinto(self, buff): 1539 if not self.remaining: 1540 return 0 1541 sz0 = min(len(buff), self.remaining) 1542 data = self.file.read(sz0) 1543 sz = len(data) 1544 self.remaining -= sz 1545 #if not data: 1546 if sz < sz0 and self.remaining: 1547 raise DisconnectionError( 1548 "The client disconnected while sending the POST/PUT body " 1549 + "(%d more bytes were expected)" % self.remaining 1550 ) 1551 buff[:sz] = data 1552 return sz 1553 1554 1555 def _cgi_FieldStorage__repr__patch(self): 1556 """ monkey patch for FieldStorage.__repr__ 1557 1558 Unbelievably, the default __repr__ on FieldStorage reads 1559 the entire file content instead of being sane about it. 1560 This is a simple replacement that doesn't do that 1561 """ 1562 if self.file: 1563 return "FieldStorage(%r, %r)" % (self.name, self.filename) 1564 return "FieldStorage(%r, %r, %r)" % (self.name, self.filename, self.value) 1565 1566 cgi_FieldStorage.__repr__ = _cgi_FieldStorage__repr__patch 1567 1568 class FakeCGIBody(io.RawIOBase): 1569 def __init__(self, vars, content_type): 1570 if content_type.startswith('multipart/form-data'): 1571 if not _get_multipart_boundary(content_type): 1572 raise ValueError('Content-type: %r does not contain boundary' 1573 % content_type) 1574 self.vars = vars 1575 self.content_type = content_type 1576 self.file = None 1577 1578 def __repr__(self): 1579 inner = repr(self.vars) 1580 if len(inner) > 20: 1581 inner = inner[:15] + '...' + inner[-5:] 1582 return '<%s at 0x%x viewing %s>' % ( 1583 self.__class__.__name__, 1584 abs(id(self)), inner) 1585 1586 def fileno(self): 1587 return None 1588 1589 @staticmethod 1590 def readable(): 1591 return True 1592 1593 def readinto(self, buff): 1594 if self.file is None: 1595 if self.content_type.startswith( 1596 'application/x-www-form-urlencoded'): 1597 data = '&'.join( 1598 '%s=%s' % (quote_plus(bytes_(k, 'utf8')), quote_plus(bytes_(v, 'utf8'))) 1599 for k,v in self.vars.items() 1600 ) 1601 self.file = io.BytesIO(bytes_(data)) 1602 elif self.content_type.startswith('multipart/form-data'): 1603 self.file = _encode_multipart( 1604 self.vars.items(), 1605 self.content_type, 1606 fout=io.BytesIO() 1607 )[1] 1608 self.file.seek(0) 1609 else: 1610 assert 0, ('Bad content type: %r' % self.content_type) 1611 return self.file.readinto(buff) 1612 1613 1614 def _get_multipart_boundary(ctype): 1615 m = re.search(r'boundary=([^ ]+)', ctype, re.I) 1616 if m: 1617 return native_(m.group(1).strip('"')) 1618 1619 1620 def _encode_multipart(vars, content_type, fout=None): 1621 """Encode a multipart request body into a string""" 1622 f = fout or io.BytesIO() 1623 w = f.write 1624 wt = lambda t: f.write(t.encode('utf8')) 1625 CRLF = b'\r\n' 1626 boundary = _get_multipart_boundary(content_type) 1627 if not boundary: 1628 boundary = native_(binascii.hexlify(os.urandom(10))) 1629 content_type += ('; boundary=%s' % boundary) 1630 for name, value in vars: 1631 w(b'--') 1632 wt(boundary) 1633 w(CRLF) 1634 assert name is not None, 'Value associated with no name: %r' % value 1635 wt('Content-Disposition: form-data; name="%s"' % name) 1636 filename = None 1637 if getattr(value, 'filename', None): 1638 filename = value.filename 1639 elif isinstance(value, (list, tuple)): 1640 filename, value = value 1641 if hasattr(value, 'read'): 1642 value = value.read() 1643 1644 if filename is not None: 1645 wt('; filename="%s"' % filename) 1646 mime_type = mimetypes.guess_type(filename)[0] 1647 else: 1648 mime_type = None 1649 1650 w(CRLF) 1651 1652 # TODO: should handle value.disposition_options 1653 if getattr(value, 'type', None): 1654 wt('Content-type: %s' % value.type) 1655 if value.type_options: 1656 for ct_name, ct_value in sorted(value.type_options.items()): 1657 wt('; %s="%s"' % (ct_name, ct_value)) 1658 w(CRLF) 1659 elif mime_type: 1660 wt('Content-type: %s' % mime_type) 1661 w(CRLF) 1662 w(CRLF) 1663 if hasattr(value, 'value'): 1664 value = value.value 1665 if isinstance(value, bytes): 1666 w(value) 1667 else: 1668 wt(value) 1669 w(CRLF) 1670 wt('--%s--' % boundary) 1671 if fout: 1672 return content_type, fout 1673 else: 1674 return content_type, f.getvalue() 1675 1676 def detect_charset(ctype): 1677 m = CHARSET_RE.search(ctype) 1678 if m: 1679 return m.group(1).strip('"').strip() 1680 1681 def _is_utf8(charset): 1682 if not charset: 1683 return True 1684 else: 1685 return charset.lower().replace('-', '') == 'utf8' 1686 1687 1688 class Transcoder(object): 1689 def __init__(self, charset, errors='strict'): 1690 self.charset = charset # source charset 1691 self.errors = errors # unicode errors 1692 self._trans = lambda b: b.decode(charset, errors).encode('utf8') 1693 1694 def transcode_query(self, q): 1695 if PY3: # pragma: no cover 1696 q_orig = q 1697 if '=' not in q: 1698 # this doesn't look like a form submission 1699 return q_orig 1700 q = list(parse_qsl_text(q, self.charset)) 1701 return url_encode(q) 1702 else: 1703 q_orig = q 1704 if '=' not in q: 1705 # this doesn't look like a form submission 1706 return q_orig 1707 q = urlparse.parse_qsl(q, self.charset) 1708 t = self._trans 1709 q = [(t(k), t(v)) for k,v in q] 1710 return url_encode(q) 1711 1712 def transcode_fs(self, fs, content_type): 1713 # transcode FieldStorage 1714 if PY3: # pragma: no cover 1715 decode = lambda b: b 1716 else: 1717 decode = lambda b: b.decode(self.charset, self.errors) 1718 data = [] 1719 for field in fs.list or (): 1720 field.name = decode(field.name) 1721 if field.filename: 1722 field.filename = decode(field.filename) 1723 data.append((field.name, field)) 1724 else: 1725 data.append((field.name, decode(field.value))) 1726 1727 # TODO: transcode big requests to temp file 1728 content_type, fout = _encode_multipart( 1729 data, 1730 content_type, 1731 fout=io.BytesIO() 1732 ) 1733 return fout 1734 1735 # TODO: remove in 1.4 1736 for _name in 'GET POST params cookies'.split(): 1737 _str_name = 'str_'+_name 1738 _prop = deprecated_property( 1739 None, _str_name, 1740 "disabled starting WebOb 1.2, use %s instead" % _name, '1.2') 1741 setattr(BaseRequest, _str_name, _prop) 1742