Home | History | Annotate | Download | only in util
      1 # (c) 2005 Clark C. Evans and contributors
      2 # This module is part of the Python Paste Project and is released under
      3 # the MIT License: http://www.opensource.org/licenses/mit-license.php
      4 # Some of this code was funded by: http://prometheusresearch.com
      5 """
      6 Date, Time, and Timespan Parsing Utilities
      7 
      8 This module contains parsing support to create "human friendly"
      9 ``datetime`` object parsing.  The explicit goal of these routines is
     10 to provide a multi-format date/time support not unlike that found in
     11 Microsoft Excel.  In most approaches, the input is very "strict" to
     12 prevent errors -- however, this approach is much more liberal since we
     13 are assuming the user-interface is parroting back the normalized value
     14 and thus the user has immediate feedback if the data is not typed in
     15 correctly.
     16 
     17   ``parse_date`` and ``normalize_date``
     18 
     19      These functions take a value like '9 jan 2007' and returns either an
     20      ``date`` object, or an ISO 8601 formatted date value such
     21      as '2007-01-09'.  There is an option to provide an Oracle database
     22      style output as well, ``09 JAN 2007``, but this is not the default.
     23 
     24      This module always treats '/' delimiters as using US date order
     25      (since the author's clients are US based), hence '1/9/2007' is
     26      January 9th.  Since this module treats the '-' as following
     27      European order this supports both modes of data-entry; together
     28      with immediate parroting back the result to the screen, the author
     29      has found this approach to work well in pratice.
     30 
     31   ``parse_time`` and ``normalize_time``
     32 
     33      These functions take a value like '1 pm' and returns either an
     34      ``time`` object, or an ISO 8601 formatted 24h clock time
     35      such as '13:00'.  There is an option to provide for US style time
     36      values, '1:00 PM', however this is not the default.
     37 
     38   ``parse_datetime`` and ``normalize_datetime``
     39 
     40      These functions take a value like '9 jan 2007 at 1 pm' and returns
     41      either an ``datetime`` object, or an ISO 8601 formatted
     42      return (without the T) such as '2007-01-09 13:00'. There is an
     43      option to provide for Oracle / US style, '09 JAN 2007 @ 1:00 PM',
     44      however this is not the default.
     45 
     46   ``parse_delta`` and ``normalize_delta``
     47 
     48      These functions take a value like '1h 15m' and returns either an
     49      ``timedelta`` object, or an 2-decimal fixed-point
     50      numerical value in hours, such as '1.25'.  The rationale is to
     51      support meeting or time-billing lengths, not to be an accurate
     52      representation in mili-seconds.  As such not all valid
     53      ``timedelta`` values will have a normalized representation.
     54 
     55 """
     56 from datetime import timedelta, time, date
     57 from time import localtime
     58 
     59 __all__ = ['parse_timedelta', 'normalize_timedelta',
     60            'parse_time', 'normalize_time',
     61            'parse_date', 'normalize_date']
     62 
     63 def _number(val):
     64     try:
     65         return int(val)
     66     except:
     67         return None
     68 
     69 #
     70 # timedelta
     71 #
     72 def parse_timedelta(val):
     73     """
     74     returns a ``timedelta`` object, or None
     75     """
     76     if not val:
     77         return None
     78     val = val.lower()
     79     if "." in val:
     80         val = float(val)
     81         return timedelta(hours=int(val), minutes=60*(val % 1.0))
     82     fHour = ("h" in val or ":" in val)
     83     fMin  = ("m" in val or ":" in val)
     84     for noise in "minu:teshour()":
     85         val = val.replace(noise, ' ')
     86     val = val.strip()
     87     val = val.split()
     88     hr = 0.0
     89     mi = 0
     90     val.reverse()
     91     if fHour:
     92         hr = int(val.pop())
     93     if fMin:
     94         mi = int(val.pop())
     95     if len(val) > 0 and not hr:
     96         hr = int(val.pop())
     97     return timedelta(hours=hr, minutes=mi)
     98 
     99 def normalize_timedelta(val):
    100     """
    101     produces a normalized string value of the timedelta
    102 
    103     This module returns a normalized time span value consisting of the
    104     number of hours in fractional form. For example '1h 15min' is
    105     formatted as 01.25.
    106     """
    107     if type(val) == str:
    108         val = parse_timedelta(val)
    109     if not val:
    110         return ''
    111     hr = val.seconds/3600
    112     mn = (val.seconds % 3600)/60
    113     return "%d.%02d" % (hr, mn * 100/60)
    114 
    115 #
    116 # time
    117 #
    118 def parse_time(val):
    119     if not val:
    120         return None
    121     hr = mi = 0
    122     val = val.lower()
    123     amflag = (-1 != val.find('a'))  # set if AM is found
    124     pmflag = (-1 != val.find('p'))  # set if PM is found
    125     for noise in ":amp.":
    126         val = val.replace(noise, ' ')
    127     val = val.split()
    128     if len(val) > 1:
    129         hr = int(val[0])
    130         mi = int(val[1])
    131     else:
    132         val = val[0]
    133         if len(val) < 1:
    134             pass
    135         elif 'now' == val:
    136             tm = localtime()
    137             hr = tm[3]
    138             mi = tm[4]
    139         elif 'noon' == val:
    140             hr = 12
    141         elif len(val) < 3:
    142             hr = int(val)
    143             if not amflag and not pmflag and hr < 7:
    144                 hr += 12
    145         elif len(val) < 5:
    146             hr = int(val[:-2])
    147             mi = int(val[-2:])
    148         else:
    149             hr = int(val[:1])
    150     if amflag and hr >= 12:
    151         hr = hr - 12
    152     if pmflag and hr < 12:
    153         hr = hr + 12
    154     return time(hr, mi)
    155 
    156 def normalize_time(value, ampm):
    157     if not value:
    158         return ''
    159     if type(value) == str:
    160         value = parse_time(value)
    161     if not ampm:
    162         return "%02d:%02d" % (value.hour, value.minute)
    163     hr = value.hour
    164     am = "AM"
    165     if hr < 1 or hr > 23:
    166         hr = 12
    167     elif hr >= 12:
    168         am = "PM"
    169         if hr > 12:
    170             hr = hr - 12
    171     return "%02d:%02d %s" % (hr, value.minute, am)
    172 
    173 #
    174 # Date Processing
    175 #
    176 
    177 _one_day = timedelta(days=1)
    178 
    179 _str2num = {'jan':1, 'feb':2, 'mar':3, 'apr':4,  'may':5, 'jun':6,
    180             'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12 }
    181 
    182 def _month(val):
    183     for (key, mon) in _str2num.items():
    184         if key in val:
    185             return mon
    186     raise TypeError("unknown month '%s'" % val)
    187 
    188 _days_in_month = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30,
    189                   7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31,
    190                   }
    191 _num2str = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun',
    192             7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec',
    193             }
    194 _wkdy = ("mon", "tue", "wed", "thu", "fri", "sat", "sun")
    195 
    196 def parse_date(val):
    197     if not(val):
    198         return None
    199     val = val.lower()
    200     now = None
    201 
    202     # optimized check for YYYY-MM-DD
    203     strict = val.split("-")
    204     if len(strict) == 3:
    205         (y, m, d) = strict
    206         if "+" in d:
    207             d = d.split("+")[0]
    208         if " " in d:
    209             d = d.split(" ")[0]
    210         try:
    211             now = date(int(y), int(m), int(d))
    212             val = "xxx" + val[10:]
    213         except ValueError:
    214             pass
    215 
    216     # allow for 'now', 'mon', 'tue', etc.
    217     if not now:
    218         chk = val[:3]
    219         if chk in ('now','tod'):
    220             now = date.today()
    221         elif chk in _wkdy:
    222             now = date.today()
    223             idx = list(_wkdy).index(chk) + 1
    224             while now.isoweekday() != idx:
    225                 now += _one_day
    226 
    227     # allow dates to be modified via + or - /w number of days, so
    228     # that now+3 is three days from now
    229     if now:
    230         tail = val[3:].strip()
    231         tail = tail.replace("+"," +").replace("-"," -")
    232         for item in tail.split():
    233             try:
    234                 days = int(item)
    235             except ValueError:
    236                 pass
    237             else:
    238                 now += timedelta(days=days)
    239         return now
    240 
    241     # ok, standard parsing
    242     yr = mo = dy = None
    243     for noise in ('/', '-', ',', '*'):
    244         val = val.replace(noise, ' ')
    245     for noise in _wkdy:
    246         val = val.replace(noise, ' ')
    247     out = []
    248     last = False
    249     ldig = False
    250     for ch in val:
    251         if ch.isdigit():
    252             if last and not ldig:
    253                out.append(' ')
    254             last = ldig = True
    255         else:
    256             if ldig:
    257                 out.append(' ')
    258                 ldig = False
    259             last = True
    260         out.append(ch)
    261     val = "".join(out).split()
    262     if 3 == len(val):
    263         a = _number(val[0])
    264         b = _number(val[1])
    265         c = _number(val[2])
    266         if len(val[0]) == 4:
    267             yr = a
    268             if b:  # 1999 6 23
    269                 mo = b
    270                 dy = c
    271             else:  # 1999 Jun 23
    272                 mo = _month(val[1])
    273                 dy = c
    274         elif a is not None and a > 0:
    275             yr = c
    276             if len(val[2]) < 4:
    277                 raise TypeError("four digit year required")
    278             if b: # 6 23 1999
    279                 dy = b
    280                 mo = a
    281             else: # 23 Jun 1999
    282                 dy = a
    283                 mo = _month(val[1])
    284         else: # Jun 23, 2000
    285             dy = b
    286             yr = c
    287             if len(val[2]) < 4:
    288                 raise TypeError("four digit year required")
    289             mo = _month(val[0])
    290     elif 2 == len(val):
    291         a = _number(val[0])
    292         b = _number(val[1])
    293         if a is not None and a > 999:
    294             yr = a
    295             dy = 1
    296             if b is not None and b > 0: # 1999 6
    297                 mo = b
    298             else: # 1999 Jun
    299                 mo = _month(val[1])
    300         elif a is not None and a > 0:
    301             if b is not None and b > 999: # 6 1999
    302                 mo = a
    303                 yr = b
    304                 dy = 1
    305             elif b is not None and b > 0: # 6 23
    306                 mo = a
    307                 dy = b
    308             else: # 23 Jun
    309                 dy = a
    310                 mo = _month(val[1])
    311         else:
    312             if b > 999: # Jun 2001
    313                 yr = b
    314                 dy = 1
    315             else:  # Jun 23
    316                 dy = b
    317             mo = _month(val[0])
    318     elif 1 == len(val):
    319         val = val[0]
    320         if not val.isdigit():
    321             mo = _month(val)
    322             if mo is not None:
    323                 dy = 1
    324         else:
    325             v = _number(val)
    326             val = str(v)
    327             if 8 == len(val): # 20010623
    328                 yr = _number(val[:4])
    329                 mo = _number(val[4:6])
    330                 dy = _number(val[6:])
    331             elif len(val) in (3,4):
    332                 if v is not None and v > 1300: # 2004
    333                     yr = v
    334                     mo = 1
    335                     dy = 1
    336                 else:        # 1202
    337                     mo = _number(val[:-2])
    338                     dy = _number(val[-2:])
    339             elif v < 32:
    340                 dy = v
    341             else:
    342                 raise TypeError("four digit year required")
    343     tm = localtime()
    344     if mo is None:
    345         mo = tm[1]
    346     if dy is None:
    347         dy = tm[2]
    348     if yr is None:
    349         yr = tm[0]
    350     return date(yr, mo, dy)
    351 
    352 def normalize_date(val, iso8601=True):
    353     if not val:
    354         return ''
    355     if type(val) == str:
    356         val = parse_date(val)
    357     if iso8601:
    358         return "%4d-%02d-%02d" % (val.year, val.month, val.day)
    359     return "%02d %s %4d" % (val.day, _num2str[val.month], val.year)
    360