Home | History | Annotate | Download | only in python2.7
      1 """A collection of string operations (most are no longer used).
      2 
      3 Warning: most of the code you see here isn't normally used nowadays.
      4 Beginning with Python 1.6, many of these functions are implemented as
      5 methods on the standard string object. They used to be implemented by
      6 a built-in module called strop, but strop is now obsolete itself.
      7 
      8 Public module variables:
      9 
     10 whitespace -- a string containing all characters considered whitespace
     11 lowercase -- a string containing all characters considered lowercase letters
     12 uppercase -- a string containing all characters considered uppercase letters
     13 letters -- a string containing all characters considered letters
     14 digits -- a string containing all characters considered decimal digits
     15 hexdigits -- a string containing all characters considered hexadecimal digits
     16 octdigits -- a string containing all characters considered octal digits
     17 punctuation -- a string containing all characters considered punctuation
     18 printable -- a string containing all characters considered printable
     19 
     20 """
     21 
     22 # Some strings for ctype-style character classification
     23 whitespace = ' \t\n\r\v\f'
     24 lowercase = 'abcdefghijklmnopqrstuvwxyz'
     25 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
     26 letters = lowercase + uppercase
     27 ascii_lowercase = lowercase
     28 ascii_uppercase = uppercase
     29 ascii_letters = ascii_lowercase + ascii_uppercase
     30 digits = '0123456789'
     31 hexdigits = digits + 'abcdef' + 'ABCDEF'
     32 octdigits = '01234567'
     33 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
     34 printable = digits + letters + punctuation + whitespace
     35 
     36 # Case conversion helpers
     37 # Use str to convert Unicode literal in case of -U
     38 l = map(chr, xrange(256))
     39 _idmap = str('').join(l)
     40 del l
     41 
     42 # Functions which aren't available as string methods.
     43 
     44 # Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
     45 def capwords(s, sep=None):
     46     """capwords(s [,sep]) -> string
     47 
     48     Split the argument into words using split, capitalize each
     49     word using capitalize, and join the capitalized words using
     50     join.  If the optional second argument sep is absent or None,
     51     runs of whitespace characters are replaced by a single space
     52     and leading and trailing whitespace are removed, otherwise
     53     sep is used to split and join the words.
     54 
     55     """
     56     return (sep or ' ').join(x.capitalize() for x in s.split(sep))
     57 
     58 
     59 # Construct a translation string
     60 _idmapL = None
     61 def maketrans(fromstr, tostr):
     62     """maketrans(frm, to) -> string
     63 
     64     Return a translation table (a string of 256 bytes long)
     65     suitable for use in string.translate.  The strings frm and to
     66     must be of the same length.
     67 
     68     """
     69     if len(fromstr) != len(tostr):
     70         raise ValueError, "maketrans arguments must have same length"
     71     global _idmapL
     72     if not _idmapL:
     73         _idmapL = list(_idmap)
     74     L = _idmapL[:]
     75     fromstr = map(ord, fromstr)
     76     for i in range(len(fromstr)):
     77         L[fromstr[i]] = tostr[i]
     78     return ''.join(L)
     79 
     80 
     81 
     82 ####################################################################
     83 import re as _re
     84 
     85 class _multimap:
     86     """Helper class for combining multiple mappings.
     87 
     88     Used by .{safe_,}substitute() to combine the mapping and keyword
     89     arguments.
     90     """
     91     def __init__(self, primary, secondary):
     92         self._primary = primary
     93         self._secondary = secondary
     94 
     95     def __getitem__(self, key):
     96         try:
     97             return self._primary[key]
     98         except KeyError:
     99             return self._secondary[key]
    100 
    101 
    102 class _TemplateMetaclass(type):
    103     pattern = r"""
    104     %(delim)s(?:
    105       (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters
    106       (?P<named>%(id)s)      |   # delimiter and a Python identifier
    107       {(?P<braced>%(id)s)}   |   # delimiter and a braced identifier
    108       (?P<invalid>)              # Other ill-formed delimiter exprs
    109     )
    110     """
    111 
    112     def __init__(cls, name, bases, dct):
    113         super(_TemplateMetaclass, cls).__init__(name, bases, dct)
    114         if 'pattern' in dct:
    115             pattern = cls.pattern
    116         else:
    117             pattern = _TemplateMetaclass.pattern % {
    118                 'delim' : _re.escape(cls.delimiter),
    119                 'id'    : cls.idpattern,
    120                 }
    121         cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
    122 
    123 
    124 class Template:
    125     """A string class for supporting $-substitutions."""
    126     __metaclass__ = _TemplateMetaclass
    127 
    128     delimiter = '$'
    129     idpattern = r'[_a-z][_a-z0-9]*'
    130 
    131     def __init__(self, template):
    132         self.template = template
    133 
    134     # Search for $$, $identifier, ${identifier}, and any bare $'s
    135 
    136     def _invalid(self, mo):
    137         i = mo.start('invalid')
    138         lines = self.template[:i].splitlines(True)
    139         if not lines:
    140             colno = 1
    141             lineno = 1
    142         else:
    143             colno = i - len(''.join(lines[:-1]))
    144             lineno = len(lines)
    145         raise ValueError('Invalid placeholder in string: line %d, col %d' %
    146                          (lineno, colno))
    147 
    148     def substitute(self, *args, **kws):
    149         if len(args) > 1:
    150             raise TypeError('Too many positional arguments')
    151         if not args:
    152             mapping = kws
    153         elif kws:
    154             mapping = _multimap(kws, args[0])
    155         else:
    156             mapping = args[0]
    157         # Helper function for .sub()
    158         def convert(mo):
    159             # Check the most common path first.
    160             named = mo.group('named') or mo.group('braced')
    161             if named is not None:
    162                 val = mapping[named]
    163                 # We use this idiom instead of str() because the latter will
    164                 # fail if val is a Unicode containing non-ASCII characters.
    165                 return '%s' % (val,)
    166             if mo.group('escaped') is not None:
    167                 return self.delimiter
    168             if mo.group('invalid') is not None:
    169                 self._invalid(mo)
    170             raise ValueError('Unrecognized named group in pattern',
    171                              self.pattern)
    172         return self.pattern.sub(convert, self.template)
    173 
    174     def safe_substitute(self, *args, **kws):
    175         if len(args) > 1:
    176             raise TypeError('Too many positional arguments')
    177         if not args:
    178             mapping = kws
    179         elif kws:
    180             mapping = _multimap(kws, args[0])
    181         else:
    182             mapping = args[0]
    183         # Helper function for .sub()
    184         def convert(mo):
    185             named = mo.group('named')
    186             if named is not None:
    187                 try:
    188                     # We use this idiom instead of str() because the latter
    189                     # will fail if val is a Unicode containing non-ASCII
    190                     return '%s' % (mapping[named],)
    191                 except KeyError:
    192                     return self.delimiter + named
    193             braced = mo.group('braced')
    194             if braced is not None:
    195                 try:
    196                     return '%s' % (mapping[braced],)
    197                 except KeyError:
    198                     return self.delimiter + '{' + braced + '}'
    199             if mo.group('escaped') is not None:
    200                 return self.delimiter
    201             if mo.group('invalid') is not None:
    202                 return self.delimiter
    203             raise ValueError('Unrecognized named group in pattern',
    204                              self.pattern)
    205         return self.pattern.sub(convert, self.template)
    206 
    207 
    208 
    209 ####################################################################
    210 # NOTE: Everything below here is deprecated.  Use string methods instead.
    211 # This stuff will go away in Python 3.0.
    212 
    213 # Backward compatible names for exceptions
    214 index_error = ValueError
    215 atoi_error = ValueError
    216 atof_error = ValueError
    217 atol_error = ValueError
    218 
    219 # convert UPPER CASE letters to lower case
    220 def lower(s):
    221     """lower(s) -> string
    222 
    223     Return a copy of the string s converted to lowercase.
    224 
    225     """
    226     return s.lower()
    227 
    228 # Convert lower case letters to UPPER CASE
    229 def upper(s):
    230     """upper(s) -> string
    231 
    232     Return a copy of the string s converted to uppercase.
    233 
    234     """
    235     return s.upper()
    236 
    237 # Swap lower case letters and UPPER CASE
    238 def swapcase(s):
    239     """swapcase(s) -> string
    240 
    241     Return a copy of the string s with upper case characters
    242     converted to lowercase and vice versa.
    243 
    244     """
    245     return s.swapcase()
    246 
    247 # Strip leading and trailing tabs and spaces
    248 def strip(s, chars=None):
    249     """strip(s [,chars]) -> string
    250 
    251     Return a copy of the string s with leading and trailing
    252     whitespace removed.
    253     If chars is given and not None, remove characters in chars instead.
    254     If chars is unicode, S will be converted to unicode before stripping.
    255 
    256     """
    257     return s.strip(chars)
    258 
    259 # Strip leading tabs and spaces
    260 def lstrip(s, chars=None):
    261     """lstrip(s [,chars]) -> string
    262 
    263     Return a copy of the string s with leading whitespace removed.
    264     If chars is given and not None, remove characters in chars instead.
    265 
    266     """
    267     return s.lstrip(chars)
    268 
    269 # Strip trailing tabs and spaces
    270 def rstrip(s, chars=None):
    271     """rstrip(s [,chars]) -> string
    272 
    273     Return a copy of the string s with trailing whitespace removed.
    274     If chars is given and not None, remove characters in chars instead.
    275 
    276     """
    277     return s.rstrip(chars)
    278 
    279 
    280 # Split a string into a list of space/tab-separated words
    281 def split(s, sep=None, maxsplit=-1):
    282     """split(s [,sep [,maxsplit]]) -> list of strings
    283 
    284     Return a list of the words in the string s, using sep as the
    285     delimiter string.  If maxsplit is given, splits at no more than
    286     maxsplit places (resulting in at most maxsplit+1 words).  If sep
    287     is not specified or is None, any whitespace string is a separator.
    288 
    289     (split and splitfields are synonymous)
    290 
    291     """
    292     return s.split(sep, maxsplit)
    293 splitfields = split
    294 
    295 # Split a string into a list of space/tab-separated words
    296 def rsplit(s, sep=None, maxsplit=-1):
    297     """rsplit(s [,sep [,maxsplit]]) -> list of strings
    298 
    299     Return a list of the words in the string s, using sep as the
    300     delimiter string, starting at the end of the string and working
    301     to the front.  If maxsplit is given, at most maxsplit splits are
    302     done. If sep is not specified or is None, any whitespace string
    303     is a separator.
    304     """
    305     return s.rsplit(sep, maxsplit)
    306 
    307 # Join fields with optional separator
    308 def join(words, sep = ' '):
    309     """join(list [,sep]) -> string
    310 
    311     Return a string composed of the words in list, with
    312     intervening occurrences of sep.  The default separator is a
    313     single space.
    314 
    315     (joinfields and join are synonymous)
    316 
    317     """
    318     return sep.join(words)
    319 joinfields = join
    320 
    321 # Find substring, raise exception if not found
    322 def index(s, *args):
    323     """index(s, sub [,start [,end]]) -> int
    324 
    325     Like find but raises ValueError when the substring is not found.
    326 
    327     """
    328     return s.index(*args)
    329 
    330 # Find last substring, raise exception if not found
    331 def rindex(s, *args):
    332     """rindex(s, sub [,start [,end]]) -> int
    333 
    334     Like rfind but raises ValueError when the substring is not found.
    335 
    336     """
    337     return s.rindex(*args)
    338 
    339 # Count non-overlapping occurrences of substring
    340 def count(s, *args):
    341     """count(s, sub[, start[,end]]) -> int
    342 
    343     Return the number of occurrences of substring sub in string
    344     s[start:end].  Optional arguments start and end are
    345     interpreted as in slice notation.
    346 
    347     """
    348     return s.count(*args)
    349 
    350 # Find substring, return -1 if not found
    351 def find(s, *args):
    352     """find(s, sub [,start [,end]]) -> in
    353 
    354     Return the lowest index in s where substring sub is found,
    355     such that sub is contained within s[start,end].  Optional
    356     arguments start and end are interpreted as in slice notation.
    357 
    358     Return -1 on failure.
    359 
    360     """
    361     return s.find(*args)
    362 
    363 # Find last substring, return -1 if not found
    364 def rfind(s, *args):
    365     """rfind(s, sub [,start [,end]]) -> int
    366 
    367     Return the highest index in s where substring sub is found,
    368     such that sub is contained within s[start,end].  Optional
    369     arguments start and end are interpreted as in slice notation.
    370 
    371     Return -1 on failure.
    372 
    373     """
    374     return s.rfind(*args)
    375 
    376 # for a bit of speed
    377 _float = float
    378 _int = int
    379 _long = long
    380 
    381 # Convert string to float
    382 def atof(s):
    383     """atof(s) -> float
    384 
    385     Return the floating point number represented by the string s.
    386 
    387     """
    388     return _float(s)
    389 
    390 
    391 # Convert string to integer
    392 def atoi(s , base=10):
    393     """atoi(s [,base]) -> int
    394 
    395     Return the integer represented by the string s in the given
    396     base, which defaults to 10.  The string s must consist of one
    397     or more digits, possibly preceded by a sign.  If base is 0, it
    398     is chosen from the leading characters of s, 0 for octal, 0x or
    399     0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
    400     accepted.
    401 
    402     """
    403     return _int(s, base)
    404 
    405 
    406 # Convert string to long integer
    407 def atol(s, base=10):
    408     """atol(s [,base]) -> long
    409 
    410     Return the long integer represented by the string s in the
    411     given base, which defaults to 10.  The string s must consist
    412     of one or more digits, possibly preceded by a sign.  If base
    413     is 0, it is chosen from the leading characters of s, 0 for
    414     octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
    415     0x or 0X is accepted.  A trailing L or l is not accepted,
    416     unless base is 0.
    417 
    418     """
    419     return _long(s, base)
    420 
    421 
    422 # Left-justify a string
    423 def ljust(s, width, *args):
    424     """ljust(s, width[, fillchar]) -> string
    425 
    426     Return a left-justified version of s, in a field of the
    427     specified width, padded with spaces as needed.  The string is
    428     never truncated.  If specified the fillchar is used instead of spaces.
    429 
    430     """
    431     return s.ljust(width, *args)
    432 
    433 # Right-justify a string
    434 def rjust(s, width, *args):
    435     """rjust(s, width[, fillchar]) -> string
    436 
    437     Return a right-justified version of s, in a field of the
    438     specified width, padded with spaces as needed.  The string is
    439     never truncated.  If specified the fillchar is used instead of spaces.
    440 
    441     """
    442     return s.rjust(width, *args)
    443 
    444 # Center a string
    445 def center(s, width, *args):
    446     """center(s, width[, fillchar]) -> string
    447 
    448     Return a center version of s, in a field of the specified
    449     width. padded with spaces as needed.  The string is never
    450     truncated.  If specified the fillchar is used instead of spaces.
    451 
    452     """
    453     return s.center(width, *args)
    454 
    455 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
    456 # Decadent feature: the argument may be a string or a number
    457 # (Use of this is deprecated; it should be a string as with ljust c.s.)
    458 def zfill(x, width):
    459     """zfill(x, width) -> string
    460 
    461     Pad a numeric string x with zeros on the left, to fill a field
    462     of the specified width.  The string x is never truncated.
    463 
    464     """
    465     if not isinstance(x, basestring):
    466         x = repr(x)
    467     return x.zfill(width)
    468 
    469 # Expand tabs in a string.
    470 # Doesn't take non-printing chars into account, but does understand \n.
    471 def expandtabs(s, tabsize=8):
    472     """expandtabs(s [,tabsize]) -> string
    473 
    474     Return a copy of the string s with all tab characters replaced
    475     by the appropriate number of spaces, depending on the current
    476     column, and the tabsize (default 8).
    477 
    478     """
    479     return s.expandtabs(tabsize)
    480 
    481 # Character translation through look-up table.
    482 def translate(s, table, deletions=""):
    483     """translate(s,table [,deletions]) -> string
    484 
    485     Return a copy of the string s, where all characters occurring
    486     in the optional argument deletions are removed, and the
    487     remaining characters have been mapped through the given
    488     translation table, which must be a string of length 256.  The
    489     deletions argument is not allowed for Unicode strings.
    490 
    491     """
    492     if deletions or table is None:
    493         return s.translate(table, deletions)
    494     else:
    495         # Add s[:0] so that if s is Unicode and table is an 8-bit string,
    496         # table is converted to Unicode.  This means that table *cannot*
    497         # be a dictionary -- for that feature, use u.translate() directly.
    498         return s.translate(table + s[:0])
    499 
    500 # Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
    501 def capitalize(s):
    502     """capitalize(s) -> string
    503 
    504     Return a copy of the string s with only its first character
    505     capitalized.
    506 
    507     """
    508     return s.capitalize()
    509 
    510 # Substring replacement (global)
    511 def replace(s, old, new, maxreplace=-1):
    512     """replace (str, old, new[, maxreplace]) -> string
    513 
    514     Return a copy of string str with all occurrences of substring
    515     old replaced by new. If the optional argument maxreplace is
    516     given, only the first maxreplace occurrences are replaced.
    517 
    518     """
    519     return s.replace(old, new, maxreplace)
    520 
    521 
    522 # Try importing optional built-in module "strop" -- if it exists,
    523 # it redefines some string operations that are 100-1000 times faster.
    524 # It also defines values for whitespace, lowercase and uppercase
    525 # that match <ctype.h>'s definitions.
    526 
    527 try:
    528     from strop import maketrans, lowercase, uppercase, whitespace
    529     letters = lowercase + uppercase
    530 except ImportError:
    531     pass                                          # Use the original versions
    532 
    533 ########################################################################
    534 # the Formatter class
    535 # see PEP 3101 for details and purpose of this class
    536 
    537 # The hard parts are reused from the C implementation.  They're exposed as "_"
    538 # prefixed methods of str and unicode.
    539 
    540 # The overall parser is implemented in str._formatter_parser.
    541 # The field name parser is implemented in str._formatter_field_name_split
    542 
    543 class Formatter(object):
    544     def format(self, format_string, *args, **kwargs):
    545         return self.vformat(format_string, args, kwargs)
    546 
    547     def vformat(self, format_string, args, kwargs):
    548         used_args = set()
    549         result = self._vformat(format_string, args, kwargs, used_args, 2)
    550         self.check_unused_args(used_args, args, kwargs)
    551         return result
    552 
    553     def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
    554         if recursion_depth < 0:
    555             raise ValueError('Max string recursion exceeded')
    556         result = []
    557         for literal_text, field_name, format_spec, conversion in \
    558                 self.parse(format_string):
    559 
    560             # output the literal text
    561             if literal_text:
    562                 result.append(literal_text)
    563 
    564             # if there's a field, output it
    565             if field_name is not None:
    566                 # this is some markup, find the object and do
    567                 #  the formatting
    568 
    569                 # given the field_name, find the object it references
    570                 #  and the argument it came from
    571                 obj, arg_used = self.get_field(field_name, args, kwargs)
    572                 used_args.add(arg_used)
    573 
    574                 # do any conversion on the resulting object
    575                 obj = self.convert_field(obj, conversion)
    576 
    577                 # expand the format spec, if needed
    578                 format_spec = self._vformat(format_spec, args, kwargs,
    579                                             used_args, recursion_depth-1)
    580 
    581                 # format the object and append to the result
    582                 result.append(self.format_field(obj, format_spec))
    583 
    584         return ''.join(result)
    585 
    586 
    587     def get_value(self, key, args, kwargs):
    588         if isinstance(key, (int, long)):
    589             return args[key]
    590         else:
    591             return kwargs[key]
    592 
    593 
    594     def check_unused_args(self, used_args, args, kwargs):
    595         pass
    596 
    597 
    598     def format_field(self, value, format_spec):
    599         return format(value, format_spec)
    600 
    601 
    602     def convert_field(self, value, conversion):
    603         # do any conversion on the resulting object
    604         if conversion is None:
    605             return value
    606         elif conversion == 's':
    607             return str(value)
    608         elif conversion == 'r':
    609             return repr(value)
    610         raise ValueError("Unknown conversion specifier {0!s}".format(conversion))
    611 
    612 
    613     # returns an iterable that contains tuples of the form:
    614     # (literal_text, field_name, format_spec, conversion)
    615     # literal_text can be zero length
    616     # field_name can be None, in which case there's no
    617     #  object to format and output
    618     # if field_name is not None, it is looked up, formatted
    619     #  with format_spec and conversion and then used
    620     def parse(self, format_string):
    621         return format_string._formatter_parser()
    622 
    623 
    624     # given a field_name, find the object it references.
    625     #  field_name:   the field being looked up, e.g. "0.name"
    626     #                 or "lookup[3]"
    627     #  used_args:    a set of which args have been used
    628     #  args, kwargs: as passed in to vformat
    629     def get_field(self, field_name, args, kwargs):
    630         first, rest = field_name._formatter_field_name_split()
    631 
    632         obj = self.get_value(first, args, kwargs)
    633 
    634         # loop through the rest of the field_name, doing
    635         #  getattr or getitem as needed
    636         for is_attr, i in rest:
    637             if is_attr:
    638                 obj = getattr(obj, i)
    639             else:
    640                 obj = obj[i]
    641 
    642         return obj, first
    643