Home | History | Annotate | Download | only in Lib
      1 #! /usr/local/bin/python
      2 
      3 # NOTE: the above "/usr/local/bin/python" is NOT a mistake.  It is
      4 # intentionally NOT "/usr/bin/env python".  On many systems
      5 # (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
      6 # scripts, and /usr/local/bin is the default directory where Python is
      7 # installed, so /usr/bin/env would be unable to find python.  Granted,
      8 # binary installations by Linux vendors often install Python in
      9 # /usr/bin.  So let those vendors patch cgi.py to match their choice
     10 # of installation.
     11 
     12 """Support module for CGI (Common Gateway Interface) scripts.
     13 
     14 This module defines a number of utilities for use by CGI scripts
     15 written in Python.
     16 """
     17 
     18 # History
     19 # -------
     20 #
     21 # Michael McLay started this module.  Steve Majewski changed the
     22 # interface to SvFormContentDict and FormContentDict.  The multipart
     23 # parsing was inspired by code submitted by Andreas Paepcke.  Guido van
     24 # Rossum rewrote, reformatted and documented the module and is currently
     25 # responsible for its maintenance.
     26 #
     27 
     28 __version__ = "2.6"
     29 
     30 
     31 # Imports
     32 # =======
     33 
     34 from io import StringIO, BytesIO, TextIOWrapper
     35 from collections import Mapping
     36 import sys
     37 import os
     38 import urllib.parse
     39 from email.parser import FeedParser
     40 from email.message import Message
     41 from warnings import warn
     42 import html
     43 import locale
     44 import tempfile
     45 
     46 __all__ = ["MiniFieldStorage", "FieldStorage",
     47            "parse", "parse_qs", "parse_qsl", "parse_multipart",
     48            "parse_header", "test", "print_exception", "print_environ",
     49            "print_form", "print_directory", "print_arguments",
     50            "print_environ_usage", "escape"]
     51 
     52 # Logging support
     53 # ===============
     54 
     55 logfile = ""            # Filename to log to, if not empty
     56 logfp = None            # File object to log to, if not None
     57 
     58 def initlog(*allargs):
     59     """Write a log message, if there is a log file.
     60 
     61     Even though this function is called initlog(), you should always
     62     use log(); log is a variable that is set either to initlog
     63     (initially), to dolog (once the log file has been opened), or to
     64     nolog (when logging is disabled).
     65 
     66     The first argument is a format string; the remaining arguments (if
     67     any) are arguments to the % operator, so e.g.
     68         log("%s: %s", "a", "b")
     69     will write "a: b" to the log file, followed by a newline.
     70 
     71     If the global logfp is not None, it should be a file object to
     72     which log data is written.
     73 
     74     If the global logfp is None, the global logfile may be a string
     75     giving a filename to open, in append mode.  This file should be
     76     world writable!!!  If the file can't be opened, logging is
     77     silently disabled (since there is no safe place where we could
     78     send an error message).
     79 
     80     """
     81     global log, logfile, logfp
     82     if logfile and not logfp:
     83         try:
     84             logfp = open(logfile, "a")
     85         except OSError:
     86             pass
     87     if not logfp:
     88         log = nolog
     89     else:
     90         log = dolog
     91     log(*allargs)
     92 
     93 def dolog(fmt, *args):
     94     """Write a log message to the log file.  See initlog() for docs."""
     95     logfp.write(fmt%args + "\n")
     96 
     97 def nolog(*allargs):
     98     """Dummy function, assigned to log when logging is disabled."""
     99     pass
    100 
    101 def closelog():
    102     """Close the log file."""
    103     global log, logfile, logfp
    104     logfile = ''
    105     if logfp:
    106         logfp.close()
    107         logfp = None
    108     log = initlog
    109 
    110 log = initlog           # The current logging function
    111 
    112 
    113 # Parsing functions
    114 # =================
    115 
    116 # Maximum input we will accept when REQUEST_METHOD is POST
    117 # 0 ==> unlimited input
    118 maxlen = 0
    119 
    120 def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
    121     """Parse a query in the environment or from a file (default stdin)
    122 
    123         Arguments, all optional:
    124 
    125         fp              : file pointer; default: sys.stdin.buffer
    126 
    127         environ         : environment dictionary; default: os.environ
    128 
    129         keep_blank_values: flag indicating whether blank values in
    130             percent-encoded forms should be treated as blank strings.
    131             A true value indicates that blanks should be retained as
    132             blank strings.  The default false value indicates that
    133             blank values are to be ignored and treated as if they were
    134             not included.
    135 
    136         strict_parsing: flag indicating what to do with parsing errors.
    137             If false (the default), errors are silently ignored.
    138             If true, errors raise a ValueError exception.
    139     """
    140     if fp is None:
    141         fp = sys.stdin
    142 
    143     # field keys and values (except for files) are returned as strings
    144     # an encoding is required to decode the bytes read from self.fp
    145     if hasattr(fp,'encoding'):
    146         encoding = fp.encoding
    147     else:
    148         encoding = 'latin-1'
    149 
    150     # fp.read() must return bytes
    151     if isinstance(fp, TextIOWrapper):
    152         fp = fp.buffer
    153 
    154     if not 'REQUEST_METHOD' in environ:
    155         environ['REQUEST_METHOD'] = 'GET'       # For testing stand-alone
    156     if environ['REQUEST_METHOD'] == 'POST':
    157         ctype, pdict = parse_header(environ['CONTENT_TYPE'])
    158         if ctype == 'multipart/form-data':
    159             return parse_multipart(fp, pdict)
    160         elif ctype == 'application/x-www-form-urlencoded':
    161             clength = int(environ['CONTENT_LENGTH'])
    162             if maxlen and clength > maxlen:
    163                 raise ValueError('Maximum content length exceeded')
    164             qs = fp.read(clength).decode(encoding)
    165         else:
    166             qs = ''                     # Unknown content-type
    167         if 'QUERY_STRING' in environ:
    168             if qs: qs = qs + '&'
    169             qs = qs + environ['QUERY_STRING']
    170         elif sys.argv[1:]:
    171             if qs: qs = qs + '&'
    172             qs = qs + sys.argv[1]
    173         environ['QUERY_STRING'] = qs    # XXX Shouldn't, really
    174     elif 'QUERY_STRING' in environ:
    175         qs = environ['QUERY_STRING']
    176     else:
    177         if sys.argv[1:]:
    178             qs = sys.argv[1]
    179         else:
    180             qs = ""
    181         environ['QUERY_STRING'] = qs    # XXX Shouldn't, really
    182     return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
    183                                  encoding=encoding)
    184 
    185 
    186 # parse query string function called from urlparse,
    187 # this is done in order to maintain backward compatibility.
    188 
    189 def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
    190     """Parse a query given as a string argument."""
    191     warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
    192          DeprecationWarning, 2)
    193     return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
    194 
    195 def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
    196     """Parse a query given as a string argument."""
    197     warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
    198          DeprecationWarning, 2)
    199     return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
    200 
    201 def parse_multipart(fp, pdict):
    202     """Parse multipart input.
    203 
    204     Arguments:
    205     fp   : input file
    206     pdict: dictionary containing other parameters of content-type header
    207 
    208     Returns a dictionary just like parse_qs(): keys are the field names, each
    209     value is a list of values for that field.  This is easy to use but not
    210     much good if you are expecting megabytes to be uploaded -- in that case,
    211     use the FieldStorage class instead which is much more flexible.  Note
    212     that content-type is the raw, unparsed contents of the content-type
    213     header.
    214 
    215     XXX This does not parse nested multipart parts -- use FieldStorage for
    216     that.
    217 
    218     XXX This should really be subsumed by FieldStorage altogether -- no
    219     point in having two implementations of the same parsing algorithm.
    220     Also, FieldStorage protects itself better against certain DoS attacks
    221     by limiting the size of the data read in one chunk.  The API here
    222     does not support that kind of protection.  This also affects parse()
    223     since it can call parse_multipart().
    224 
    225     """
    226     import http.client
    227 
    228     boundary = b""
    229     if 'boundary' in pdict:
    230         boundary = pdict['boundary']
    231     if not valid_boundary(boundary):
    232         raise ValueError('Invalid boundary in multipart form: %r'
    233                             % (boundary,))
    234 
    235     nextpart = b"--" + boundary
    236     lastpart = b"--" + boundary + b"--"
    237     partdict = {}
    238     terminator = b""
    239 
    240     while terminator != lastpart:
    241         bytes = -1
    242         data = None
    243         if terminator:
    244             # At start of next part.  Read headers first.
    245             headers = http.client.parse_headers(fp)
    246             clength = headers.get('content-length')
    247             if clength:
    248                 try:
    249                     bytes = int(clength)
    250                 except ValueError:
    251                     pass
    252             if bytes > 0:
    253                 if maxlen and bytes > maxlen:
    254                     raise ValueError('Maximum content length exceeded')
    255                 data = fp.read(bytes)
    256             else:
    257                 data = b""
    258         # Read lines until end of part.
    259         lines = []
    260         while 1:
    261             line = fp.readline()
    262             if not line:
    263                 terminator = lastpart # End outer loop
    264                 break
    265             if line.startswith(b"--"):
    266                 terminator = line.rstrip()
    267                 if terminator in (nextpart, lastpart):
    268                     break
    269             lines.append(line)
    270         # Done with part.
    271         if data is None:
    272             continue
    273         if bytes < 0:
    274             if lines:
    275                 # Strip final line terminator
    276                 line = lines[-1]
    277                 if line[-2:] == b"\r\n":
    278                     line = line[:-2]
    279                 elif line[-1:] == b"\n":
    280                     line = line[:-1]
    281                 lines[-1] = line
    282                 data = b"".join(lines)
    283         line = headers['content-disposition']
    284         if not line:
    285             continue
    286         key, params = parse_header(line)
    287         if key != 'form-data':
    288             continue
    289         if 'name' in params:
    290             name = params['name']
    291         else:
    292             continue
    293         if name in partdict:
    294             partdict[name].append(data)
    295         else:
    296             partdict[name] = [data]
    297 
    298     return partdict
    299 
    300 
    301 def _parseparam(s):
    302     while s[:1] == ';':
    303         s = s[1:]
    304         end = s.find(';')
    305         while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
    306             end = s.find(';', end + 1)
    307         if end < 0:
    308             end = len(s)
    309         f = s[:end]
    310         yield f.strip()
    311         s = s[end:]
    312 
    313 def parse_header(line):
    314     """Parse a Content-type like header.
    315 
    316     Return the main content-type and a dictionary of options.
    317 
    318     """
    319     parts = _parseparam(';' + line)
    320     key = parts.__next__()
    321     pdict = {}
    322     for p in parts:
    323         i = p.find('=')
    324         if i >= 0:
    325             name = p[:i].strip().lower()
    326             value = p[i+1:].strip()
    327             if len(value) >= 2 and value[0] == value[-1] == '"':
    328                 value = value[1:-1]
    329                 value = value.replace('\\\\', '\\').replace('\\"', '"')
    330             pdict[name] = value
    331     return key, pdict
    332 
    333 
    334 # Classes for field storage
    335 # =========================
    336 
    337 class MiniFieldStorage:
    338 
    339     """Like FieldStorage, for use when no file uploads are possible."""
    340 
    341     # Dummy attributes
    342     filename = None
    343     list = None
    344     type = None
    345     file = None
    346     type_options = {}
    347     disposition = None
    348     disposition_options = {}
    349     headers = {}
    350 
    351     def __init__(self, name, value):
    352         """Constructor from field name and value."""
    353         self.name = name
    354         self.value = value
    355         # self.file = StringIO(value)
    356 
    357     def __repr__(self):
    358         """Return printable representation."""
    359         return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
    360 
    361 
    362 class FieldStorage:
    363 
    364     """Store a sequence of fields, reading multipart/form-data.
    365 
    366     This class provides naming, typing, files stored on disk, and
    367     more.  At the top level, it is accessible like a dictionary, whose
    368     keys are the field names.  (Note: None can occur as a field name.)
    369     The items are either a Python list (if there's multiple values) or
    370     another FieldStorage or MiniFieldStorage object.  If it's a single
    371     object, it has the following attributes:
    372 
    373     name: the field name, if specified; otherwise None
    374 
    375     filename: the filename, if specified; otherwise None; this is the
    376         client side filename, *not* the file name on which it is
    377         stored (that's a temporary file you don't deal with)
    378 
    379     value: the value as a *string*; for file uploads, this
    380         transparently reads the file every time you request the value
    381         and returns *bytes*
    382 
    383     file: the file(-like) object from which you can read the data *as
    384         bytes* ; None if the data is stored a simple string
    385 
    386     type: the content-type, or None if not specified
    387 
    388     type_options: dictionary of options specified on the content-type
    389         line
    390 
    391     disposition: content-disposition, or None if not specified
    392 
    393     disposition_options: dictionary of corresponding options
    394 
    395     headers: a dictionary(-like) object (sometimes email.message.Message or a
    396         subclass thereof) containing *all* headers
    397 
    398     The class is subclassable, mostly for the purpose of overriding
    399     the make_file() method, which is called internally to come up with
    400     a file open for reading and writing.  This makes it possible to
    401     override the default choice of storing all files in a temporary
    402     directory and unlinking them as soon as they have been opened.
    403 
    404     """
    405     def __init__(self, fp=None, headers=None, outerboundary=b'',
    406                  environ=os.environ, keep_blank_values=0, strict_parsing=0,
    407                  limit=None, encoding='utf-8', errors='replace'):
    408         """Constructor.  Read multipart/* until last part.
    409 
    410         Arguments, all optional:
    411 
    412         fp              : file pointer; default: sys.stdin.buffer
    413             (not used when the request method is GET)
    414             Can be :
    415             1. a TextIOWrapper object
    416             2. an object whose read() and readline() methods return bytes
    417 
    418         headers         : header dictionary-like object; default:
    419             taken from environ as per CGI spec
    420 
    421         outerboundary   : terminating multipart boundary
    422             (for internal use only)
    423 
    424         environ         : environment dictionary; default: os.environ
    425 
    426         keep_blank_values: flag indicating whether blank values in
    427             percent-encoded forms should be treated as blank strings.
    428             A true value indicates that blanks should be retained as
    429             blank strings.  The default false value indicates that
    430             blank values are to be ignored and treated as if they were
    431             not included.
    432 
    433         strict_parsing: flag indicating what to do with parsing errors.
    434             If false (the default), errors are silently ignored.
    435             If true, errors raise a ValueError exception.
    436 
    437         limit : used internally to read parts of multipart/form-data forms,
    438             to exit from the reading loop when reached. It is the difference
    439             between the form content-length and the number of bytes already
    440             read
    441 
    442         encoding, errors : the encoding and error handler used to decode the
    443             binary stream to strings. Must be the same as the charset defined
    444             for the page sending the form (content-type : meta http-equiv or
    445             header)
    446 
    447         """
    448         method = 'GET'
    449         self.keep_blank_values = keep_blank_values
    450         self.strict_parsing = strict_parsing
    451         if 'REQUEST_METHOD' in environ:
    452             method = environ['REQUEST_METHOD'].upper()
    453         self.qs_on_post = None
    454         if method == 'GET' or method == 'HEAD':
    455             if 'QUERY_STRING' in environ:
    456                 qs = environ['QUERY_STRING']
    457             elif sys.argv[1:]:
    458                 qs = sys.argv[1]
    459             else:
    460                 qs = ""
    461             qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape')
    462             fp = BytesIO(qs)
    463             if headers is None:
    464                 headers = {'content-type':
    465                            "application/x-www-form-urlencoded"}
    466         if headers is None:
    467             headers = {}
    468             if method == 'POST':
    469                 # Set default content-type for POST to what's traditional
    470                 headers['content-type'] = "application/x-www-form-urlencoded"
    471             if 'CONTENT_TYPE' in environ:
    472                 headers['content-type'] = environ['CONTENT_TYPE']
    473             if 'QUERY_STRING' in environ:
    474                 self.qs_on_post = environ['QUERY_STRING']
    475             if 'CONTENT_LENGTH' in environ:
    476                 headers['content-length'] = environ['CONTENT_LENGTH']
    477         else:
    478             if not (isinstance(headers, (Mapping, Message))):
    479                 raise TypeError("headers must be mapping or an instance of "
    480                                 "email.message.Message")
    481         self.headers = headers
    482         if fp is None:
    483             self.fp = sys.stdin.buffer
    484         # self.fp.read() must return bytes
    485         elif isinstance(fp, TextIOWrapper):
    486             self.fp = fp.buffer
    487         else:
    488             if not (hasattr(fp, 'read') and hasattr(fp, 'readline')):
    489                 raise TypeError("fp must be file pointer")
    490             self.fp = fp
    491 
    492         self.encoding = encoding
    493         self.errors = errors
    494 
    495         if not isinstance(outerboundary, bytes):
    496             raise TypeError('outerboundary must be bytes, not %s'
    497                             % type(outerboundary).__name__)
    498         self.outerboundary = outerboundary
    499 
    500         self.bytes_read = 0
    501         self.limit = limit
    502 
    503         # Process content-disposition header
    504         cdisp, pdict = "", {}
    505         if 'content-disposition' in self.headers:
    506             cdisp, pdict = parse_header(self.headers['content-disposition'])
    507         self.disposition = cdisp
    508         self.disposition_options = pdict
    509         self.name = None
    510         if 'name' in pdict:
    511             self.name = pdict['name']
    512         self.filename = None
    513         if 'filename' in pdict:
    514             self.filename = pdict['filename']
    515         self._binary_file = self.filename is not None
    516 
    517         # Process content-type header
    518         #
    519         # Honor any existing content-type header.  But if there is no
    520         # content-type header, use some sensible defaults.  Assume
    521         # outerboundary is "" at the outer level, but something non-false
    522         # inside a multi-part.  The default for an inner part is text/plain,
    523         # but for an outer part it should be urlencoded.  This should catch
    524         # bogus clients which erroneously forget to include a content-type
    525         # header.
    526         #
    527         # See below for what we do if there does exist a content-type header,
    528         # but it happens to be something we don't understand.
    529         if 'content-type' in self.headers:
    530             ctype, pdict = parse_header(self.headers['content-type'])
    531         elif self.outerboundary or method != 'POST':
    532             ctype, pdict = "text/plain", {}
    533         else:
    534             ctype, pdict = 'application/x-www-form-urlencoded', {}
    535         self.type = ctype
    536         self.type_options = pdict
    537         if 'boundary' in pdict:
    538             self.innerboundary = pdict['boundary'].encode(self.encoding)
    539         else:
    540             self.innerboundary = b""
    541 
    542         clen = -1
    543         if 'content-length' in self.headers:
    544             try:
    545                 clen = int(self.headers['content-length'])
    546             except ValueError:
    547                 pass
    548             if maxlen and clen > maxlen:
    549                 raise ValueError('Maximum content length exceeded')
    550         self.length = clen
    551         if self.limit is None and clen:
    552             self.limit = clen
    553 
    554         self.list = self.file = None
    555         self.done = 0
    556         if ctype == 'application/x-www-form-urlencoded':
    557             self.read_urlencoded()
    558         elif ctype[:10] == 'multipart/':
    559             self.read_multi(environ, keep_blank_values, strict_parsing)
    560         else:
    561             self.read_single()
    562 
    563     def __del__(self):
    564         try:
    565             self.file.close()
    566         except AttributeError:
    567             pass
    568 
    569     def __enter__(self):
    570         return self
    571 
    572     def __exit__(self, *args):
    573         self.file.close()
    574 
    575     def __repr__(self):
    576         """Return a printable representation."""
    577         return "FieldStorage(%r, %r, %r)" % (
    578                 self.name, self.filename, self.value)
    579 
    580     def __iter__(self):
    581         return iter(self.keys())
    582 
    583     def __getattr__(self, name):
    584         if name != 'value':
    585             raise AttributeError(name)
    586         if self.file:
    587             self.file.seek(0)
    588             value = self.file.read()
    589             self.file.seek(0)
    590         elif self.list is not None:
    591             value = self.list
    592         else:
    593             value = None
    594         return value
    595 
    596     def __getitem__(self, key):
    597         """Dictionary style indexing."""
    598         if self.list is None:
    599             raise TypeError("not indexable")
    600         found = []
    601         for item in self.list:
    602             if item.name == key: found.append(item)
    603         if not found:
    604             raise KeyError(key)
    605         if len(found) == 1:
    606             return found[0]
    607         else:
    608             return found
    609 
    610     def getvalue(self, key, default=None):
    611         """Dictionary style get() method, including 'value' lookup."""
    612         if key in self:
    613             value = self[key]
    614             if isinstance(value, list):
    615                 return [x.value for x in value]
    616             else:
    617                 return value.value
    618         else:
    619             return default
    620 
    621     def getfirst(self, key, default=None):
    622         """ Return the first value received."""
    623         if key in self:
    624             value = self[key]
    625             if isinstance(value, list):
    626                 return value[0].value
    627             else:
    628                 return value.value
    629         else:
    630             return default
    631 
    632     def getlist(self, key):
    633         """ Return list of received values."""
    634         if key in self:
    635             value = self[key]
    636             if isinstance(value, list):
    637                 return [x.value for x in value]
    638             else:
    639                 return [value.value]
    640         else:
    641             return []
    642 
    643     def keys(self):
    644         """Dictionary style keys() method."""
    645         if self.list is None:
    646             raise TypeError("not indexable")
    647         return list(set(item.name for item in self.list))
    648 
    649     def __contains__(self, key):
    650         """Dictionary style __contains__ method."""
    651         if self.list is None:
    652             raise TypeError("not indexable")
    653         return any(item.name == key for item in self.list)
    654 
    655     def __len__(self):
    656         """Dictionary style len(x) support."""
    657         return len(self.keys())
    658 
    659     def __bool__(self):
    660         if self.list is None:
    661             raise TypeError("Cannot be converted to bool.")
    662         return bool(self.list)
    663 
    664     def read_urlencoded(self):
    665         """Internal: read data in query string format."""
    666         qs = self.fp.read(self.length)
    667         if not isinstance(qs, bytes):
    668             raise ValueError("%s should return bytes, got %s" \
    669                              % (self.fp, type(qs).__name__))
    670         qs = qs.decode(self.encoding, self.errors)
    671         if self.qs_on_post:
    672             qs += '&' + self.qs_on_post
    673         self.list = []
    674         query = urllib.parse.parse_qsl(
    675             qs, self.keep_blank_values, self.strict_parsing,
    676             encoding=self.encoding, errors=self.errors)
    677         for key, value in query:
    678             self.list.append(MiniFieldStorage(key, value))
    679         self.skip_lines()
    680 
    681     FieldStorageClass = None
    682 
    683     def read_multi(self, environ, keep_blank_values, strict_parsing):
    684         """Internal: read a part that is itself multipart."""
    685         ib = self.innerboundary
    686         if not valid_boundary(ib):
    687             raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
    688         self.list = []
    689         if self.qs_on_post:
    690             query = urllib.parse.parse_qsl(
    691                 self.qs_on_post, self.keep_blank_values, self.strict_parsing,
    692                 encoding=self.encoding, errors=self.errors)
    693             for key, value in query:
    694                 self.list.append(MiniFieldStorage(key, value))
    695 
    696         klass = self.FieldStorageClass or self.__class__
    697         first_line = self.fp.readline() # bytes
    698         if not isinstance(first_line, bytes):
    699             raise ValueError("%s should return bytes, got %s" \
    700                              % (self.fp, type(first_line).__name__))
    701         self.bytes_read += len(first_line)
    702 
    703         # Ensure that we consume the file until we've hit our inner boundary
    704         while (first_line.strip() != (b"--" + self.innerboundary) and
    705                 first_line):
    706             first_line = self.fp.readline()
    707             self.bytes_read += len(first_line)
    708 
    709         while True:
    710             parser = FeedParser()
    711             hdr_text = b""
    712             while True:
    713                 data = self.fp.readline()
    714                 hdr_text += data
    715                 if not data.strip():
    716                     break
    717             if not hdr_text:
    718                 break
    719             # parser takes strings, not bytes
    720             self.bytes_read += len(hdr_text)
    721             parser.feed(hdr_text.decode(self.encoding, self.errors))
    722             headers = parser.close()
    723 
    724             # Some clients add Content-Length for part headers, ignore them
    725             if 'content-length' in headers:
    726                 del headers['content-length']
    727 
    728             part = klass(self.fp, headers, ib, environ, keep_blank_values,
    729                          strict_parsing,self.limit-self.bytes_read,
    730                          self.encoding, self.errors)
    731             self.bytes_read += part.bytes_read
    732             self.list.append(part)
    733             if part.done or self.bytes_read >= self.length > 0:
    734                 break
    735         self.skip_lines()
    736 
    737     def read_single(self):
    738         """Internal: read an atomic part."""
    739         if self.length >= 0:
    740             self.read_binary()
    741             self.skip_lines()
    742         else:
    743             self.read_lines()
    744         self.file.seek(0)
    745 
    746     bufsize = 8*1024            # I/O buffering size for copy to file
    747 
    748     def read_binary(self):
    749         """Internal: read binary data."""
    750         self.file = self.make_file()
    751         todo = self.length
    752         if todo >= 0:
    753             while todo > 0:
    754                 data = self.fp.read(min(todo, self.bufsize)) # bytes
    755                 if not isinstance(data, bytes):
    756                     raise ValueError("%s should return bytes, got %s"
    757                                      % (self.fp, type(data).__name__))
    758                 self.bytes_read += len(data)
    759                 if not data:
    760                     self.done = -1
    761                     break
    762                 self.file.write(data)
    763                 todo = todo - len(data)
    764 
    765     def read_lines(self):
    766         """Internal: read lines until EOF or outerboundary."""
    767         if self._binary_file:
    768             self.file = self.__file = BytesIO() # store data as bytes for files
    769         else:
    770             self.file = self.__file = StringIO() # as strings for other fields
    771         if self.outerboundary:
    772             self.read_lines_to_outerboundary()
    773         else:
    774             self.read_lines_to_eof()
    775 
    776     def __write(self, line):
    777         """line is always bytes, not string"""
    778         if self.__file is not None:
    779             if self.__file.tell() + len(line) > 1000:
    780                 self.file = self.make_file()
    781                 data = self.__file.getvalue()
    782                 self.file.write(data)
    783                 self.__file = None
    784         if self._binary_file:
    785             # keep bytes
    786             self.file.write(line)
    787         else:
    788             # decode to string
    789             self.file.write(line.decode(self.encoding, self.errors))
    790 
    791     def read_lines_to_eof(self):
    792         """Internal: read lines until EOF."""
    793         while 1:
    794             line = self.fp.readline(1<<16) # bytes
    795             self.bytes_read += len(line)
    796             if not line:
    797                 self.done = -1
    798                 break
    799             self.__write(line)
    800 
    801     def read_lines_to_outerboundary(self):
    802         """Internal: read lines until outerboundary.
    803         Data is read as bytes: boundaries and line ends must be converted
    804         to bytes for comparisons.
    805         """
    806         next_boundary = b"--" + self.outerboundary
    807         last_boundary = next_boundary + b"--"
    808         delim = b""
    809         last_line_lfend = True
    810         _read = 0
    811         while 1:
    812             if _read >= self.limit:
    813                 break
    814             line = self.fp.readline(1<<16) # bytes
    815             self.bytes_read += len(line)
    816             _read += len(line)
    817             if not line:
    818                 self.done = -1
    819                 break
    820             if delim == b"\r":
    821                 line = delim + line
    822                 delim = b""
    823             if line.startswith(b"--") and last_line_lfend:
    824                 strippedline = line.rstrip()
    825                 if strippedline == next_boundary:
    826                     break
    827                 if strippedline == last_boundary:
    828                     self.done = 1
    829                     break
    830             odelim = delim
    831             if line.endswith(b"\r\n"):
    832                 delim = b"\r\n"
    833                 line = line[:-2]
    834                 last_line_lfend = True
    835             elif line.endswith(b"\n"):
    836                 delim = b"\n"
    837                 line = line[:-1]
    838                 last_line_lfend = True
    839             elif line.endswith(b"\r"):
    840                 # We may interrupt \r\n sequences if they span the 2**16
    841                 # byte boundary
    842                 delim = b"\r"
    843                 line = line[:-1]
    844                 last_line_lfend = False
    845             else:
    846                 delim = b""
    847                 last_line_lfend = False
    848             self.__write(odelim + line)
    849 
    850     def skip_lines(self):
    851         """Internal: skip lines until outer boundary if defined."""
    852         if not self.outerboundary or self.done:
    853             return
    854         next_boundary = b"--" + self.outerboundary
    855         last_boundary = next_boundary + b"--"
    856         last_line_lfend = True
    857         while True:
    858             line = self.fp.readline(1<<16)
    859             self.bytes_read += len(line)
    860             if not line:
    861                 self.done = -1
    862                 break
    863             if line.endswith(b"--") and last_line_lfend:
    864                 strippedline = line.strip()
    865                 if strippedline == next_boundary:
    866                     break
    867                 if strippedline == last_boundary:
    868                     self.done = 1
    869                     break
    870             last_line_lfend = line.endswith(b'\n')
    871 
    872     def make_file(self):
    873         """Overridable: return a readable & writable file.
    874 
    875         The file will be used as follows:
    876         - data is written to it
    877         - seek(0)
    878         - data is read from it
    879 
    880         The file is opened in binary mode for files, in text mode
    881         for other fields
    882 
    883         This version opens a temporary file for reading and writing,
    884         and immediately deletes (unlinks) it.  The trick (on Unix!) is
    885         that the file can still be used, but it can't be opened by
    886         another process, and it will automatically be deleted when it
    887         is closed or when the current process terminates.
    888 
    889         If you want a more permanent file, you derive a class which
    890         overrides this method.  If you want a visible temporary file
    891         that is nevertheless automatically deleted when the script
    892         terminates, try defining a __del__ method in a derived class
    893         which unlinks the temporary files you have created.
    894 
    895         """
    896         if self._binary_file:
    897             return tempfile.TemporaryFile("wb+")
    898         else:
    899             return tempfile.TemporaryFile("w+",
    900                 encoding=self.encoding, newline = '\n')
    901 
    902 
    903 # Test/debug code
    904 # ===============
    905 
    906 def test(environ=os.environ):
    907     """Robust test CGI script, usable as main program.
    908 
    909     Write minimal HTTP headers and dump all information provided to
    910     the script in HTML form.
    911 
    912     """
    913     print("Content-type: text/html")
    914     print()
    915     sys.stderr = sys.stdout
    916     try:
    917         form = FieldStorage()   # Replace with other classes to test those
    918         print_directory()
    919         print_arguments()
    920         print_form(form)
    921         print_environ(environ)
    922         print_environ_usage()
    923         def f():
    924             exec("testing print_exception() -- <I>italics?</I>")
    925         def g(f=f):
    926             f()
    927         print("<H3>What follows is a test, not an actual exception:</H3>")
    928         g()
    929     except:
    930         print_exception()
    931 
    932     print("<H1>Second try with a small maxlen...</H1>")
    933 
    934     global maxlen
    935     maxlen = 50
    936     try:
    937         form = FieldStorage()   # Replace with other classes to test those
    938         print_directory()
    939         print_arguments()
    940         print_form(form)
    941         print_environ(environ)
    942     except:
    943         print_exception()
    944 
    945 def print_exception(type=None, value=None, tb=None, limit=None):
    946     if type is None:
    947         type, value, tb = sys.exc_info()
    948     import traceback
    949     print()
    950     print("<H3>Traceback (most recent call last):</H3>")
    951     list = traceback.format_tb(tb, limit) + \
    952            traceback.format_exception_only(type, value)
    953     print("<PRE>%s<B>%s</B></PRE>" % (
    954         html.escape("".join(list[:-1])),
    955         html.escape(list[-1]),
    956         ))
    957     del tb
    958 
    959 def print_environ(environ=os.environ):
    960     """Dump the shell environment as HTML."""
    961     keys = sorted(environ.keys())
    962     print()
    963     print("<H3>Shell Environment:</H3>")
    964     print("<DL>")
    965     for key in keys:
    966         print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
    967     print("</DL>")
    968     print()
    969 
    970 def print_form(form):
    971     """Dump the contents of a form as HTML."""
    972     keys = sorted(form.keys())
    973     print()
    974     print("<H3>Form Contents:</H3>")
    975     if not keys:
    976         print("<P>No form fields.")
    977     print("<DL>")
    978     for key in keys:
    979         print("<DT>" + html.escape(key) + ":", end=' ')
    980         value = form[key]
    981         print("<i>" + html.escape(repr(type(value))) + "</i>")
    982         print("<DD>" + html.escape(repr(value)))
    983     print("</DL>")
    984     print()
    985 
    986 def print_directory():
    987     """Dump the current directory as HTML."""
    988     print()
    989     print("<H3>Current Working Directory:</H3>")
    990     try:
    991         pwd = os.getcwd()
    992     except OSError as msg:
    993         print("OSError:", html.escape(str(msg)))
    994     else:
    995         print(html.escape(pwd))
    996     print()
    997 
    998 def print_arguments():
    999     print()
   1000     print("<H3>Command Line Arguments:</H3>")
   1001     print()
   1002     print(sys.argv)
   1003     print()
   1004 
   1005 def print_environ_usage():
   1006     """Dump a list of environment variables used by CGI as HTML."""
   1007     print("""
   1008 <H3>These environment variables could have been set:</H3>
   1009 <UL>
   1010 <LI>AUTH_TYPE
   1011 <LI>CONTENT_LENGTH
   1012 <LI>CONTENT_TYPE
   1013 <LI>DATE_GMT
   1014 <LI>DATE_LOCAL
   1015 <LI>DOCUMENT_NAME
   1016 <LI>DOCUMENT_ROOT
   1017 <LI>DOCUMENT_URI
   1018 <LI>GATEWAY_INTERFACE
   1019 <LI>LAST_MODIFIED
   1020 <LI>PATH
   1021 <LI>PATH_INFO
   1022 <LI>PATH_TRANSLATED
   1023 <LI>QUERY_STRING
   1024 <LI>REMOTE_ADDR
   1025 <LI>REMOTE_HOST
   1026 <LI>REMOTE_IDENT
   1027 <LI>REMOTE_USER
   1028 <LI>REQUEST_METHOD
   1029 <LI>SCRIPT_NAME
   1030 <LI>SERVER_NAME
   1031 <LI>SERVER_PORT
   1032 <LI>SERVER_PROTOCOL
   1033 <LI>SERVER_ROOT
   1034 <LI>SERVER_SOFTWARE
   1035 </UL>
   1036 In addition, HTTP headers sent by the server may be passed in the
   1037 environment as well.  Here are some common variable names:
   1038 <UL>
   1039 <LI>HTTP_ACCEPT
   1040 <LI>HTTP_CONNECTION
   1041 <LI>HTTP_HOST
   1042 <LI>HTTP_PRAGMA
   1043 <LI>HTTP_REFERER
   1044 <LI>HTTP_USER_AGENT
   1045 </UL>
   1046 """)
   1047 
   1048 
   1049 # Utilities
   1050 # =========
   1051 
   1052 def escape(s, quote=None):
   1053     """Deprecated API."""
   1054     warn("cgi.escape is deprecated, use html.escape instead",
   1055          DeprecationWarning, stacklevel=2)
   1056     s = s.replace("&", "&amp;") # Must be done first!
   1057     s = s.replace("<", "&lt;")
   1058     s = s.replace(">", "&gt;")
   1059     if quote:
   1060         s = s.replace('"', "&quot;")
   1061     return s
   1062 
   1063 
   1064 def valid_boundary(s):
   1065     import re
   1066     if isinstance(s, bytes):
   1067         _vb_pattern = b"^[ -~]{0,200}[!-~]$"
   1068     else:
   1069         _vb_pattern = "^[ -~]{0,200}[!-~]$"
   1070     return re.match(_vb_pattern, s)
   1071 
   1072 # Invoke mainline
   1073 # ===============
   1074 
   1075 # Call test() when this file is run as a script (not imported as a module)
   1076 if __name__ == '__main__':
   1077     test()
   1078