Home | History | Annotate | Download | only in Lib
      1 # module 'string' -- A collection of string operations

      2 
      3 # Warning: most of the code you see here isn't normally used nowadays.  With

      4 # Python 1.6, many of these functions are implemented as methods on the

      5 # standard string object. They used to be implemented by a built-in module

      6 # called strop, but strop is now obsolete itself.

      7 
      8 """Common string manipulations.
      9 
     10 Public module variables:
     11 
     12 whitespace -- a string containing all characters considered whitespace
     13 lowercase -- a string containing all characters considered lowercase letters
     14 uppercase -- a string containing all characters considered uppercase letters
     15 letters -- a string containing all characters considered letters
     16 digits -- a string containing all characters considered decimal digits
     17 hexdigits -- a string containing all characters considered hexadecimal digits
     18 octdigits -- a string containing all characters considered octal digits
     19 
     20 """
     21 from warnings import warnpy3k
     22 warnpy3k("the stringold module has been removed in Python 3.0", stacklevel=2)
     23 del warnpy3k
     24 
     25 # Some strings for ctype-style character classification

     26 whitespace = ' \t\n\r\v\f'
     27 lowercase = 'abcdefghijklmnopqrstuvwxyz'
     28 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
     29 letters = lowercase + uppercase
     30 digits = '0123456789'
     31 hexdigits = digits + 'abcdef' + 'ABCDEF'
     32 octdigits = '01234567'
     33 
     34 # Case conversion helpers

     35 _idmap = ''
     36 for i in range(256): _idmap = _idmap + chr(i)
     37 del i
     38 
     39 # Backward compatible names for exceptions

     40 index_error = ValueError
     41 atoi_error = ValueError
     42 atof_error = ValueError
     43 atol_error = ValueError
     44 
     45 # convert UPPER CASE letters to lower case

     46 def lower(s):
     47     """lower(s) -> string
     48 
     49     Return a copy of the string s converted to lowercase.
     50 
     51     """
     52     return s.lower()
     53 
     54 # Convert lower case letters to UPPER CASE

     55 def upper(s):
     56     """upper(s) -> string
     57 
     58     Return a copy of the string s converted to uppercase.
     59 
     60     """
     61     return s.upper()
     62 
     63 # Swap lower case letters and UPPER CASE

     64 def swapcase(s):
     65     """swapcase(s) -> string
     66 
     67     Return a copy of the string s with upper case characters
     68     converted to lowercase and vice versa.
     69 
     70     """
     71     return s.swapcase()
     72 
     73 # Strip leading and trailing tabs and spaces

     74 def strip(s):
     75     """strip(s) -> string
     76 
     77     Return a copy of the string s with leading and trailing
     78     whitespace removed.
     79 
     80     """
     81     return s.strip()
     82 
     83 # Strip leading tabs and spaces

     84 def lstrip(s):
     85     """lstrip(s) -> string
     86 
     87     Return a copy of the string s with leading whitespace removed.
     88 
     89     """
     90     return s.lstrip()
     91 
     92 # Strip trailing tabs and spaces

     93 def rstrip(s):
     94     """rstrip(s) -> string
     95 
     96     Return a copy of the string s with trailing whitespace
     97     removed.
     98 
     99     """
    100     return s.rstrip()
    101 
    102 
    103 # Split a string into a list of space/tab-separated words

    104 def split(s, sep=None, maxsplit=0):
    105     """split(str [,sep [,maxsplit]]) -> list of strings
    106 
    107     Return a list of the words in the string s, using sep as the
    108     delimiter string.  If maxsplit is nonzero, splits into at most
    109     maxsplit words If sep is not specified, any whitespace string
    110     is a separator.  Maxsplit defaults to 0.
    111 
    112     (split and splitfields are synonymous)
    113 
    114     """
    115     return s.split(sep, maxsplit)
    116 splitfields = split
    117 
    118 # Join fields with optional separator

    119 def join(words, sep = ' '):
    120     """join(list [,sep]) -> string
    121 
    122     Return a string composed of the words in list, with
    123     intervening occurrences of sep.  The default separator is a
    124     single space.
    125 
    126     (joinfields and join are synonymous)
    127 
    128     """
    129     return sep.join(words)
    130 joinfields = join
    131 
    132 # for a little bit of speed

    133 _apply = apply
    134 
    135 # Find substring, raise exception if not found

    136 def index(s, *args):
    137     """index(s, sub [,start [,end]]) -> int
    138 
    139     Like find but raises ValueError when the substring is not found.
    140 
    141     """
    142     return _apply(s.index, args)
    143 
    144 # Find last substring, raise exception if not found

    145 def rindex(s, *args):
    146     """rindex(s, sub [,start [,end]]) -> int
    147 
    148     Like rfind but raises ValueError when the substring is not found.
    149 
    150     """
    151     return _apply(s.rindex, args)
    152 
    153 # Count non-overlapping occurrences of substring

    154 def count(s, *args):
    155     """count(s, sub[, start[,end]]) -> int
    156 
    157     Return the number of occurrences of substring sub in string
    158     s[start:end].  Optional arguments start and end are
    159     interpreted as in slice notation.
    160 
    161     """
    162     return _apply(s.count, args)
    163 
    164 # Find substring, return -1 if not found

    165 def find(s, *args):
    166     """find(s, sub [,start [,end]]) -> in
    167 
    168     Return the lowest index in s where substring sub is found,
    169     such that sub is contained within s[start,end].  Optional
    170     arguments start and end are interpreted as in slice notation.
    171 
    172     Return -1 on failure.
    173 
    174     """
    175     return _apply(s.find, args)
    176 
    177 # Find last substring, return -1 if not found

    178 def rfind(s, *args):
    179     """rfind(s, sub [,start [,end]]) -> int
    180 
    181     Return the highest index in s where substring sub is found,
    182     such that sub is contained within s[start,end].  Optional
    183     arguments start and end are interpreted as in slice notation.
    184 
    185     Return -1 on failure.
    186 
    187     """
    188     return _apply(s.rfind, args)
    189 
    190 # for a bit of speed

    191 _float = float
    192 _int = int
    193 _long = long
    194 _StringType = type('')
    195 
    196 # Convert string to float

    197 def atof(s):
    198     """atof(s) -> float
    199 
    200     Return the floating point number represented by the string s.
    201 
    202     """
    203     if type(s) == _StringType:
    204         return _float(s)
    205     else:
    206         raise TypeError('argument 1: expected string, %s found' %
    207                         type(s).__name__)
    208 
    209 # Convert string to integer

    210 def atoi(*args):
    211     """atoi(s [,base]) -> int
    212 
    213     Return the integer represented by the string s in the given
    214     base, which defaults to 10.  The string s must consist of one
    215     or more digits, possibly preceded by a sign.  If base is 0, it
    216     is chosen from the leading characters of s, 0 for octal, 0x or
    217     0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
    218     accepted.
    219 
    220     """
    221     try:
    222         s = args[0]
    223     except IndexError:
    224         raise TypeError('function requires at least 1 argument: %d given' %
    225                         len(args))
    226     # Don't catch type error resulting from too many arguments to int().  The

    227     # error message isn't compatible but the error type is, and this function

    228     # is complicated enough already.

    229     if type(s) == _StringType:
    230         return _apply(_int, args)
    231     else:
    232         raise TypeError('argument 1: expected string, %s found' %
    233                         type(s).__name__)
    234 
    235 
    236 # Convert string to long integer

    237 def atol(*args):
    238     """atol(s [,base]) -> long
    239 
    240     Return the long integer represented by the string s in the
    241     given base, which defaults to 10.  The string s must consist
    242     of one or more digits, possibly preceded by a sign.  If base
    243     is 0, it is chosen from the leading characters of s, 0 for
    244     octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
    245     0x or 0X is accepted.  A trailing L or l is not accepted,
    246     unless base is 0.
    247 
    248     """
    249     try:
    250         s = args[0]
    251     except IndexError:
    252         raise TypeError('function requires at least 1 argument: %d given' %
    253                         len(args))
    254     # Don't catch type error resulting from too many arguments to long().  The

    255     # error message isn't compatible but the error type is, and this function

    256     # is complicated enough already.

    257     if type(s) == _StringType:
    258         return _apply(_long, args)
    259     else:
    260         raise TypeError('argument 1: expected string, %s found' %
    261                         type(s).__name__)
    262 
    263 
    264 # Left-justify a string

    265 def ljust(s, width):
    266     """ljust(s, width) -> string
    267 
    268     Return a left-justified version of s, in a field of the
    269     specified width, padded with spaces as needed.  The string is
    270     never truncated.
    271 
    272     """
    273     n = width - len(s)
    274     if n <= 0: return s
    275     return s + ' '*n
    276 
    277 # Right-justify a string

    278 def rjust(s, width):
    279     """rjust(s, width) -> string
    280 
    281     Return a right-justified version of s, in a field of the
    282     specified width, padded with spaces as needed.  The string is
    283     never truncated.
    284 
    285     """
    286     n = width - len(s)
    287     if n <= 0: return s
    288     return ' '*n + s
    289 
    290 # Center a string

    291 def center(s, width):
    292     """center(s, width) -> string
    293 
    294     Return a center version of s, in a field of the specified
    295     width. padded with spaces as needed.  The string is never
    296     truncated.
    297 
    298     """
    299     n = width - len(s)
    300     if n <= 0: return s
    301     half = n/2
    302     if n%2 and width%2:
    303         # This ensures that center(center(s, i), j) = center(s, j)

    304         half = half+1
    305     return ' '*half +  s + ' '*(n-half)
    306 
    307 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'

    308 # Decadent feature: the argument may be a string or a number

    309 # (Use of this is deprecated; it should be a string as with ljust c.s.)

    310 def zfill(x, width):
    311     """zfill(x, width) -> string
    312 
    313     Pad a numeric string x with zeros on the left, to fill a field
    314     of the specified width.  The string x is never truncated.
    315 
    316     """
    317     if type(x) == type(''): s = x
    318     else: s = repr(x)
    319     n = len(s)
    320     if n >= width: return s
    321     sign = ''
    322     if s[0] in ('-', '+'):
    323         sign, s = s[0], s[1:]
    324     return sign + '0'*(width-n) + s
    325 
    326 # Expand tabs in a string.

    327 # Doesn't take non-printing chars into account, but does understand \n.

    328 def expandtabs(s, tabsize=8):
    329     """expandtabs(s [,tabsize]) -> string
    330 
    331     Return a copy of the string s with all tab characters replaced
    332     by the appropriate number of spaces, depending on the current
    333     column, and the tabsize (default 8).
    334 
    335     """
    336     res = line = ''
    337     for c in s:
    338         if c == '\t':
    339             c = ' '*(tabsize - len(line) % tabsize)
    340         line = line + c
    341         if c == '\n':
    342             res = res + line
    343             line = ''
    344     return res + line
    345 
    346 # Character translation through look-up table.

    347 def translate(s, table, deletions=""):
    348     """translate(s,table [,deletechars]) -> string
    349 
    350     Return a copy of the string s, where all characters occurring
    351     in the optional argument deletechars are removed, and the
    352     remaining characters have been mapped through the given
    353     translation table, which must be a string of length 256.
    354 
    355     """
    356     return s.translate(table, deletions)
    357 
    358 # Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".

    359 def capitalize(s):
    360     """capitalize(s) -> string
    361 
    362     Return a copy of the string s with only its first character
    363     capitalized.
    364 
    365     """
    366     return s.capitalize()
    367 
    368 # Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".

    369 def capwords(s, sep=None):
    370     """capwords(s, [sep]) -> string
    371 
    372     Split the argument into words using split, capitalize each
    373     word using capitalize, and join the capitalized words using
    374     join. Note that this replaces runs of whitespace characters by
    375     a single space.
    376 
    377     """
    378     return join(map(capitalize, s.split(sep)), sep or ' ')
    379 
    380 # Construct a translation string

    381 _idmapL = None
    382 def maketrans(fromstr, tostr):
    383     """maketrans(frm, to) -> string
    384 
    385     Return a translation table (a string of 256 bytes long)
    386     suitable for use in string.translate.  The strings frm and to
    387     must be of the same length.
    388 
    389     """
    390     if len(fromstr) != len(tostr):
    391         raise ValueError, "maketrans arguments must have same length"
    392     global _idmapL
    393     if not _idmapL:
    394         _idmapL = list(_idmap)
    395     L = _idmapL[:]
    396     fromstr = map(ord, fromstr)
    397     for i in range(len(fromstr)):
    398         L[fromstr[i]] = tostr[i]
    399     return join(L, "")
    400 
    401 # Substring replacement (global)

    402 def replace(s, old, new, maxsplit=0):
    403     """replace (str, old, new[, maxsplit]) -> string
    404 
    405     Return a copy of string str with all occurrences of substring
    406     old replaced by new. If the optional argument maxsplit is
    407     given, only the first maxsplit occurrences are replaced.
    408 
    409     """
    410     return s.replace(old, new, maxsplit)
    411 
    412 
    413 # XXX: transitional

    414 #

    415 # If string objects do not have methods, then we need to use the old string.py

    416 # library, which uses strop for many more things than just the few outlined

    417 # below.

    418 try:
    419     ''.upper
    420 except AttributeError:
    421     from stringold import *
    422 
    423 # Try importing optional built-in module "strop" -- if it exists,

    424 # it redefines some string operations that are 100-1000 times faster.

    425 # It also defines values for whitespace, lowercase and uppercase

    426 # that match <ctype.h>'s definitions.

    427 
    428 try:
    429     from strop import maketrans, lowercase, uppercase, whitespace
    430     letters = lowercase + uppercase
    431 except ImportError:
    432     pass                                          # Use the original versions

    433