Home | History | Annotate | Download | only in parser
      1 # -*- coding: utf-8 -*-
      2 """
      3 This module offers a generic date/time string parser which is able to parse
      4 most known formats to represent a date and/or time.
      5 
      6 This module attempts to be forgiving with regards to unlikely input formats,
      7 returning a datetime object even for dates which are ambiguous. If an element
      8 of a date/time stamp is omitted, the following rules are applied:
      9 
     10 - If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
     11   on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
     12   specified.
     13 - If a time zone is omitted, a timezone-naive datetime is returned.
     14 
     15 If any other elements are missing, they are taken from the
     16 :class:`datetime.datetime` object passed to the parameter ``default``. If this
     17 results in a day number exceeding the valid number of days per month, the
     18 value falls back to the end of the month.
     19 
     20 Additional resources about date/time string formats can be found below:
     21 
     22 - `A summary of the international standard date and time notation
     23   <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
     24 - `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
     25 - `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
     26 - `CPAN ParseDate module
     27   <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
     28 - `Java SimpleDateFormat Class
     29   <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
     30 """
     31 from __future__ import unicode_literals
     32 
     33 import datetime
     34 import re
     35 import string
     36 import time
     37 import warnings
     38 
     39 from calendar import monthrange
     40 from io import StringIO
     41 
     42 import six
     43 from six import binary_type, integer_types, text_type
     44 
     45 from decimal import Decimal
     46 
     47 from warnings import warn
     48 
     49 from .. import relativedelta
     50 from .. import tz
     51 
     52 __all__ = ["parse", "parserinfo"]
     53 
     54 
     55 # TODO: pandas.core.tools.datetimes imports this explicitly.  Might be worth
     56 # making public and/or figuring out if there is something we can
     57 # take off their plate.
     58 class _timelex(object):
     59     # Fractional seconds are sometimes split by a comma
     60     _split_decimal = re.compile("([.,])")
     61 
     62     def __init__(self, instream):
     63         if six.PY2:
     64             # In Python 2, we can't duck type properly because unicode has
     65             # a 'decode' function, and we'd be double-decoding
     66             if isinstance(instream, (binary_type, bytearray)):
     67                 instream = instream.decode()
     68         else:
     69             if getattr(instream, 'decode', None) is not None:
     70                 instream = instream.decode()
     71 
     72         if isinstance(instream, text_type):
     73             instream = StringIO(instream)
     74         elif getattr(instream, 'read', None) is None:
     75             raise TypeError('Parser must be a string or character stream, not '
     76                             '{itype}'.format(itype=instream.__class__.__name__))
     77 
     78         self.instream = instream
     79         self.charstack = []
     80         self.tokenstack = []
     81         self.eof = False
     82 
     83     def get_token(self):
     84         """
     85         This function breaks the time string into lexical units (tokens), which
     86         can be parsed by the parser. Lexical units are demarcated by changes in
     87         the character set, so any continuous string of letters is considered
     88         one unit, any continuous string of numbers is considered one unit.
     89 
     90         The main complication arises from the fact that dots ('.') can be used
     91         both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
     92         "4:30:21.447"). As such, it is necessary to read the full context of
     93         any dot-separated strings before breaking it into tokens; as such, this
     94         function maintains a "token stack", for when the ambiguous context
     95         demands that multiple tokens be parsed at once.
     96         """
     97         if self.tokenstack:
     98             return self.tokenstack.pop(0)
     99 
    100         seenletters = False
    101         token = None
    102         state = None
    103 
    104         while not self.eof:
    105             # We only realize that we've reached the end of a token when we
    106             # find a character that's not part of the current token - since
    107             # that character may be part of the next token, it's stored in the
    108             # charstack.
    109             if self.charstack:
    110                 nextchar = self.charstack.pop(0)
    111             else:
    112                 nextchar = self.instream.read(1)
    113                 while nextchar == '\x00':
    114                     nextchar = self.instream.read(1)
    115 
    116             if not nextchar:
    117                 self.eof = True
    118                 break
    119             elif not state:
    120                 # First character of the token - determines if we're starting
    121                 # to parse a word, a number or something else.
    122                 token = nextchar
    123                 if self.isword(nextchar):
    124                     state = 'a'
    125                 elif self.isnum(nextchar):
    126                     state = '0'
    127                 elif self.isspace(nextchar):
    128                     token = ' '
    129                     break  # emit token
    130                 else:
    131                     break  # emit token
    132             elif state == 'a':
    133                 # If we've already started reading a word, we keep reading
    134                 # letters until we find something that's not part of a word.
    135                 seenletters = True
    136                 if self.isword(nextchar):
    137                     token += nextchar
    138                 elif nextchar == '.':
    139                     token += nextchar
    140                     state = 'a.'
    141                 else:
    142                     self.charstack.append(nextchar)
    143                     break  # emit token
    144             elif state == '0':
    145                 # If we've already started reading a number, we keep reading
    146                 # numbers until we find something that doesn't fit.
    147                 if self.isnum(nextchar):
    148                     token += nextchar
    149                 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
    150                     token += nextchar
    151                     state = '0.'
    152                 else:
    153                     self.charstack.append(nextchar)
    154                     break  # emit token
    155             elif state == 'a.':
    156                 # If we've seen some letters and a dot separator, continue
    157                 # parsing, and the tokens will be broken up later.
    158                 seenletters = True
    159                 if nextchar == '.' or self.isword(nextchar):
    160                     token += nextchar
    161                 elif self.isnum(nextchar) and token[-1] == '.':
    162                     token += nextchar
    163                     state = '0.'
    164                 else:
    165                     self.charstack.append(nextchar)
    166                     break  # emit token
    167             elif state == '0.':
    168                 # If we've seen at least one dot separator, keep going, we'll
    169                 # break up the tokens later.
    170                 if nextchar == '.' or self.isnum(nextchar):
    171                     token += nextchar
    172                 elif self.isword(nextchar) and token[-1] == '.':
    173                     token += nextchar
    174                     state = 'a.'
    175                 else:
    176                     self.charstack.append(nextchar)
    177                     break  # emit token
    178 
    179         if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
    180                                        token[-1] in '.,')):
    181             l = self._split_decimal.split(token)
    182             token = l[0]
    183             for tok in l[1:]:
    184                 if tok:
    185                     self.tokenstack.append(tok)
    186 
    187         if state == '0.' and token.count('.') == 0:
    188             token = token.replace(',', '.')
    189 
    190         return token
    191 
    192     def __iter__(self):
    193         return self
    194 
    195     def __next__(self):
    196         token = self.get_token()
    197         if token is None:
    198             raise StopIteration
    199 
    200         return token
    201 
    202     def next(self):
    203         return self.__next__()  # Python 2.x support
    204 
    205     @classmethod
    206     def split(cls, s):
    207         return list(cls(s))
    208 
    209     @classmethod
    210     def isword(cls, nextchar):
    211         """ Whether or not the next character is part of a word """
    212         return nextchar.isalpha()
    213 
    214     @classmethod
    215     def isnum(cls, nextchar):
    216         """ Whether the next character is part of a number """
    217         return nextchar.isdigit()
    218 
    219     @classmethod
    220     def isspace(cls, nextchar):
    221         """ Whether the next character is whitespace """
    222         return nextchar.isspace()
    223 
    224 
    225 class _resultbase(object):
    226 
    227     def __init__(self):
    228         for attr in self.__slots__:
    229             setattr(self, attr, None)
    230 
    231     def _repr(self, classname):
    232         l = []
    233         for attr in self.__slots__:
    234             value = getattr(self, attr)
    235             if value is not None:
    236                 l.append("%s=%s" % (attr, repr(value)))
    237         return "%s(%s)" % (classname, ", ".join(l))
    238 
    239     def __len__(self):
    240         return (sum(getattr(self, attr) is not None
    241                     for attr in self.__slots__))
    242 
    243     def __repr__(self):
    244         return self._repr(self.__class__.__name__)
    245 
    246 
    247 class parserinfo(object):
    248     """
    249     Class which handles what inputs are accepted. Subclass this to customize
    250     the language and acceptable values for each parameter.
    251 
    252     :param dayfirst:
    253         Whether to interpret the first value in an ambiguous 3-integer date
    254         (e.g. 01/05/09) as the day (``True``) or month (``False``). If
    255         ``yearfirst`` is set to ``True``, this distinguishes between YDM
    256         and YMD. Default is ``False``.
    257 
    258     :param yearfirst:
    259         Whether to interpret the first value in an ambiguous 3-integer date
    260         (e.g. 01/05/09) as the year. If ``True``, the first number is taken
    261         to be the year, otherwise the last number is taken to be the year.
    262         Default is ``False``.
    263     """
    264 
    265     # m from a.m/p.m, t from ISO T separator
    266     JUMP = [" ", ".", ",", ";", "-", "/", "'",
    267             "at", "on", "and", "ad", "m", "t", "of",
    268             "st", "nd", "rd", "th"]
    269 
    270     WEEKDAYS = [("Mon", "Monday"),
    271                 ("Tue", "Tuesday"),     # TODO: "Tues"
    272                 ("Wed", "Wednesday"),
    273                 ("Thu", "Thursday"),    # TODO: "Thurs"
    274                 ("Fri", "Friday"),
    275                 ("Sat", "Saturday"),
    276                 ("Sun", "Sunday")]
    277     MONTHS = [("Jan", "January"),
    278               ("Feb", "February"),      # TODO: "Febr"
    279               ("Mar", "March"),
    280               ("Apr", "April"),
    281               ("May", "May"),
    282               ("Jun", "June"),
    283               ("Jul", "July"),
    284               ("Aug", "August"),
    285               ("Sep", "Sept", "September"),
    286               ("Oct", "October"),
    287               ("Nov", "November"),
    288               ("Dec", "December")]
    289     HMS = [("h", "hour", "hours"),
    290            ("m", "minute", "minutes"),
    291            ("s", "second", "seconds")]
    292     AMPM = [("am", "a"),
    293             ("pm", "p")]
    294     UTCZONE = ["UTC", "GMT", "Z"]
    295     PERTAIN = ["of"]
    296     TZOFFSET = {}
    297     # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
    298     #              "Anno Domini", "Year of Our Lord"]
    299 
    300     def __init__(self, dayfirst=False, yearfirst=False):
    301         self._jump = self._convert(self.JUMP)
    302         self._weekdays = self._convert(self.WEEKDAYS)
    303         self._months = self._convert(self.MONTHS)
    304         self._hms = self._convert(self.HMS)
    305         self._ampm = self._convert(self.AMPM)
    306         self._utczone = self._convert(self.UTCZONE)
    307         self._pertain = self._convert(self.PERTAIN)
    308 
    309         self.dayfirst = dayfirst
    310         self.yearfirst = yearfirst
    311 
    312         self._year = time.localtime().tm_year
    313         self._century = self._year // 100 * 100
    314 
    315     def _convert(self, lst):
    316         dct = {}
    317         for i, v in enumerate(lst):
    318             if isinstance(v, tuple):
    319                 for v in v:
    320                     dct[v.lower()] = i
    321             else:
    322                 dct[v.lower()] = i
    323         return dct
    324 
    325     def jump(self, name):
    326         return name.lower() in self._jump
    327 
    328     def weekday(self, name):
    329         try:
    330             return self._weekdays[name.lower()]
    331         except KeyError:
    332             pass
    333         return None
    334 
    335     def month(self, name):
    336         try:
    337             return self._months[name.lower()] + 1
    338         except KeyError:
    339             pass
    340         return None
    341 
    342     def hms(self, name):
    343         try:
    344             return self._hms[name.lower()]
    345         except KeyError:
    346             return None
    347 
    348     def ampm(self, name):
    349         try:
    350             return self._ampm[name.lower()]
    351         except KeyError:
    352             return None
    353 
    354     def pertain(self, name):
    355         return name.lower() in self._pertain
    356 
    357     def utczone(self, name):
    358         return name.lower() in self._utczone
    359 
    360     def tzoffset(self, name):
    361         if name in self._utczone:
    362             return 0
    363 
    364         return self.TZOFFSET.get(name)
    365 
    366     def convertyear(self, year, century_specified=False):
    367         if year < 100 and not century_specified:
    368             year += self._century
    369             if abs(year - self._year) >= 50:
    370                 if year < self._year:
    371                     year += 100
    372                 else:
    373                     year -= 100
    374         return year
    375 
    376     def validate(self, res):
    377         # move to info
    378         if res.year is not None:
    379             res.year = self.convertyear(res.year, res.century_specified)
    380 
    381         if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
    382             res.tzname = "UTC"
    383             res.tzoffset = 0
    384         elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
    385             res.tzoffset = 0
    386         return True
    387 
    388 
    389 class _ymd(list):
    390     def __init__(self, *args, **kwargs):
    391         super(self.__class__, self).__init__(*args, **kwargs)
    392         self.century_specified = False
    393         self.dstridx = None
    394         self.mstridx = None
    395         self.ystridx = None
    396 
    397     @property
    398     def has_year(self):
    399         return self.ystridx is not None
    400 
    401     @property
    402     def has_month(self):
    403         return self.mstridx is not None
    404 
    405     @property
    406     def has_day(self):
    407         return self.dstridx is not None
    408 
    409     def could_be_day(self, value):
    410         if self.has_day:
    411             return False
    412         elif not self.has_month:
    413             return 1 <= value <= 31
    414         elif not self.has_year:
    415             # Be permissive, assume leapyear
    416             month = self[self.mstridx]
    417             return 1 <= value <= monthrange(2000, month)[1]
    418         else:
    419             month = self[self.mstridx]
    420             year = self[self.ystridx]
    421             return 1 <= value <= monthrange(year, month)[1]
    422 
    423     def append(self, val, label=None):
    424         if hasattr(val, '__len__'):
    425             if val.isdigit() and len(val) > 2:
    426                 self.century_specified = True
    427                 if label not in [None, 'Y']:  # pragma: no cover
    428                     raise ValueError(label)
    429                 label = 'Y'
    430         elif val > 100:
    431             self.century_specified = True
    432             if label not in [None, 'Y']:  # pragma: no cover
    433                 raise ValueError(label)
    434             label = 'Y'
    435 
    436         super(self.__class__, self).append(int(val))
    437 
    438         if label == 'M':
    439             if self.has_month:
    440                 raise ValueError('Month is already set')
    441             self.mstridx = len(self) - 1
    442         elif label == 'D':
    443             if self.has_day:
    444                 raise ValueError('Day is already set')
    445             self.dstridx = len(self) - 1
    446         elif label == 'Y':
    447             if self.has_year:
    448                 raise ValueError('Year is already set')
    449             self.ystridx = len(self) - 1
    450 
    451     def resolve_ymd(self, yearfirst, dayfirst):
    452         len_ymd = len(self)
    453         year, month, day = (None, None, None)
    454 
    455         mstridx = self.mstridx
    456 
    457         if len_ymd > 3:
    458             raise ValueError("More than three YMD values")
    459         elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):
    460             # One member, or two members with a month string
    461             if mstridx is not None:
    462                 month = self[mstridx]
    463                 del self[mstridx]
    464 
    465             if len_ymd > 1 or mstridx is None:
    466                 if self[0] > 31:
    467                     year = self[0]
    468                 else:
    469                     day = self[0]
    470 
    471         elif len_ymd == 2:
    472             # Two members with numbers
    473             if self[0] > 31:
    474                 # 99-01
    475                 year, month = self
    476             elif self[1] > 31:
    477                 # 01-99
    478                 month, year = self
    479             elif dayfirst and self[1] <= 12:
    480                 # 13-01
    481                 day, month = self
    482             else:
    483                 # 01-13
    484                 month, day = self
    485 
    486         elif len_ymd == 3:
    487             # Three members
    488             if mstridx == 0:
    489                 if self[1] > 31:
    490                     # Apr-2003-25
    491                     month, year, day = self
    492                 else:
    493                     month, day, year = self
    494             elif mstridx == 1:
    495                 if self[0] > 31 or (yearfirst and self[2] <= 31):
    496                     # 99-Jan-01
    497                     year, month, day = self
    498                 else:
    499                     # 01-Jan-01
    500                     # Give precendence to day-first, since
    501                     # two-digit years is usually hand-written.
    502                     day, month, year = self
    503 
    504             elif mstridx == 2:
    505                 # WTF!?
    506                 if self[1] > 31:
    507                     # 01-99-Jan
    508                     day, year, month = self
    509                 else:
    510                     # 99-01-Jan
    511                     year, day, month = self
    512 
    513             else:
    514                 if (self[0] > 31 or
    515                     self.ystridx == 0 or
    516                         (yearfirst and self[1] <= 12 and self[2] <= 31)):
    517                     # 99-01-01
    518                     if dayfirst and self[2] <= 12:
    519                         year, day, month = self
    520                     else:
    521                         year, month, day = self
    522                 elif self[0] > 12 or (dayfirst and self[1] <= 12):
    523                     # 13-01-01
    524                     day, month, year = self
    525                 else:
    526                     # 01-13-01
    527                     month, day, year = self
    528 
    529         return year, month, day
    530 
    531 
    532 class parser(object):
    533     def __init__(self, info=None):
    534         self.info = info or parserinfo()
    535 
    536     def parse(self, timestr, default=None,
    537               ignoretz=False, tzinfos=None, **kwargs):
    538         """
    539         Parse the date/time string into a :class:`datetime.datetime` object.
    540 
    541         :param timestr:
    542             Any date/time string using the supported formats.
    543 
    544         :param default:
    545             The default datetime object, if this is a datetime object and not
    546             ``None``, elements specified in ``timestr`` replace elements in the
    547             default object.
    548 
    549         :param ignoretz:
    550             If set ``True``, time zones in parsed strings are ignored and a
    551             naive :class:`datetime.datetime` object is returned.
    552 
    553         :param tzinfos:
    554             Additional time zone names / aliases which may be present in the
    555             string. This argument maps time zone names (and optionally offsets
    556             from those time zones) to time zones. This parameter can be a
    557             dictionary with timezone aliases mapping time zone names to time
    558             zones or a function taking two parameters (``tzname`` and
    559             ``tzoffset``) and returning a time zone.
    560 
    561             The timezones to which the names are mapped can be an integer
    562             offset from UTC in seconds or a :class:`tzinfo` object.
    563 
    564             .. doctest::
    565                :options: +NORMALIZE_WHITESPACE
    566 
    567                 >>> from dateutil.parser import parse
    568                 >>> from dateutil.tz import gettz
    569                 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
    570                 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
    571                 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
    572                 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
    573                 datetime.datetime(2012, 1, 19, 17, 21,
    574                                   tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
    575 
    576             This parameter is ignored if ``ignoretz`` is set.
    577 
    578         :param \\*\\*kwargs:
    579             Keyword arguments as passed to ``_parse()``.
    580 
    581         :return:
    582             Returns a :class:`datetime.datetime` object or, if the
    583             ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
    584             first element being a :class:`datetime.datetime` object, the second
    585             a tuple containing the fuzzy tokens.
    586 
    587         :raises ValueError:
    588             Raised for invalid or unknown string format, if the provided
    589             :class:`tzinfo` is not in a valid format, or if an invalid date
    590             would be created.
    591 
    592         :raises TypeError:
    593             Raised for non-string or character stream input.
    594 
    595         :raises OverflowError:
    596             Raised if the parsed date exceeds the largest valid C integer on
    597             your system.
    598         """
    599 
    600         if default is None:
    601             default = datetime.datetime.now().replace(hour=0, minute=0,
    602                                                       second=0, microsecond=0)
    603 
    604         res, skipped_tokens = self._parse(timestr, **kwargs)
    605 
    606         if res is None:
    607             raise ValueError("Unknown string format:", timestr)
    608 
    609         if len(res) == 0:
    610             raise ValueError("String does not contain a date:", timestr)
    611 
    612         ret = self._build_naive(res, default)
    613 
    614         if not ignoretz:
    615             ret = self._build_tzaware(ret, res, tzinfos)
    616 
    617         if kwargs.get('fuzzy_with_tokens', False):
    618             return ret, skipped_tokens
    619         else:
    620             return ret
    621 
    622     class _result(_resultbase):
    623         __slots__ = ["year", "month", "day", "weekday",
    624                      "hour", "minute", "second", "microsecond",
    625                      "tzname", "tzoffset", "ampm","any_unused_tokens"]
    626 
    627     def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
    628                fuzzy_with_tokens=False):
    629         """
    630         Private method which performs the heavy lifting of parsing, called from
    631         ``parse()``, which passes on its ``kwargs`` to this function.
    632 
    633         :param timestr:
    634             The string to parse.
    635 
    636         :param dayfirst:
    637             Whether to interpret the first value in an ambiguous 3-integer date
    638             (e.g. 01/05/09) as the day (``True``) or month (``False``). If
    639             ``yearfirst`` is set to ``True``, this distinguishes between YDM
    640             and YMD. If set to ``None``, this value is retrieved from the
    641             current :class:`parserinfo` object (which itself defaults to
    642             ``False``).
    643 
    644         :param yearfirst:
    645             Whether to interpret the first value in an ambiguous 3-integer date
    646             (e.g. 01/05/09) as the year. If ``True``, the first number is taken
    647             to be the year, otherwise the last number is taken to be the year.
    648             If this is set to ``None``, the value is retrieved from the current
    649             :class:`parserinfo` object (which itself defaults to ``False``).
    650 
    651         :param fuzzy:
    652             Whether to allow fuzzy parsing, allowing for string like "Today is
    653             January 1, 2047 at 8:21:00AM".
    654 
    655         :param fuzzy_with_tokens:
    656             If ``True``, ``fuzzy`` is automatically set to True, and the parser
    657             will return a tuple where the first element is the parsed
    658             :class:`datetime.datetime` datetimestamp and the second element is
    659             a tuple containing the portions of the string which were ignored:
    660 
    661             .. doctest::
    662 
    663                 >>> from dateutil.parser import parse
    664                 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
    665                 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
    666 
    667         """
    668         if fuzzy_with_tokens:
    669             fuzzy = True
    670 
    671         info = self.info
    672 
    673         if dayfirst is None:
    674             dayfirst = info.dayfirst
    675 
    676         if yearfirst is None:
    677             yearfirst = info.yearfirst
    678 
    679         res = self._result()
    680         l = _timelex.split(timestr)         # Splits the timestr into tokens
    681 
    682         skipped_idxs = []
    683 
    684         # year/month/day list
    685         ymd = _ymd()
    686 
    687         len_l = len(l)
    688         i = 0
    689         try:
    690             while i < len_l:
    691 
    692                 # Check if it's a number
    693                 value_repr = l[i]
    694                 try:
    695                     value = float(value_repr)
    696                 except ValueError:
    697                     value = None
    698 
    699                 if value is not None:
    700                     # Numeric token
    701                     i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)
    702 
    703                 # Check weekday
    704                 elif info.weekday(l[i]) is not None:
    705                     value = info.weekday(l[i])
    706                     res.weekday = value
    707 
    708                 # Check month name
    709                 elif info.month(l[i]) is not None:
    710                     value = info.month(l[i])
    711                     ymd.append(value, 'M')
    712 
    713                     if i + 1 < len_l:
    714                         if l[i + 1] in ('-', '/'):
    715                             # Jan-01[-99]
    716                             sep = l[i + 1]
    717                             ymd.append(l[i + 2])
    718 
    719                             if i + 3 < len_l and l[i + 3] == sep:
    720                                 # Jan-01-99
    721                                 ymd.append(l[i + 4])
    722                                 i += 2
    723 
    724                             i += 2
    725 
    726                         elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and
    727                               info.pertain(l[i + 2])):
    728                             # Jan of 01
    729                             # In this case, 01 is clearly year
    730                             if l[i + 4].isdigit():
    731                                 # Convert it here to become unambiguous
    732                                 value = int(l[i + 4])
    733                                 year = str(info.convertyear(value))
    734                                 ymd.append(year, 'Y')
    735                             else:
    736                                 # Wrong guess
    737                                 pass
    738                                 # TODO: not hit in tests
    739                             i += 4
    740 
    741                 # Check am/pm
    742                 elif info.ampm(l[i]) is not None:
    743                     value = info.ampm(l[i])
    744                     val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)
    745 
    746                     if val_is_ampm:
    747                         res.hour = self._adjust_ampm(res.hour, value)
    748                         res.ampm = value
    749 
    750                     elif fuzzy:
    751                         skipped_idxs.append(i)
    752 
    753                 # Check for a timezone name
    754                 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):
    755                     res.tzname = l[i]
    756                     res.tzoffset = info.tzoffset(res.tzname)
    757 
    758                     # Check for something like GMT+3, or BRST+3. Notice
    759                     # that it doesn't mean "I am 3 hours after GMT", but
    760                     # "my time +3 is GMT". If found, we reverse the
    761                     # logic so that timezone parsing code will get it
    762                     # right.
    763                     if i + 1 < len_l and l[i + 1] in ('+', '-'):
    764                         l[i + 1] = ('+', '-')[l[i + 1] == '+']
    765                         res.tzoffset = None
    766                         if info.utczone(res.tzname):
    767                             # With something like GMT+3, the timezone
    768                             # is *not* GMT.
    769                             res.tzname = None
    770 
    771                 # Check for a numbered timezone
    772                 elif res.hour is not None and l[i] in ('+', '-'):
    773                     signal = (-1, 1)[l[i] == '+']
    774                     len_li = len(l[i + 1])
    775 
    776                     # TODO: check that l[i + 1] is integer?
    777                     if len_li == 4:
    778                         # -0300
    779                         hour_offset = int(l[i + 1][:2])
    780                         min_offset = int(l[i + 1][2:])
    781                     elif i + 2 < len_l and l[i + 2] == ':':
    782                         # -03:00
    783                         hour_offset = int(l[i + 1])
    784                         min_offset = int(l[i + 3])  # TODO: Check that l[i+3] is minute-like?
    785                         i += 2
    786                     elif len_li <= 2:
    787                         # -[0]3
    788                         hour_offset = int(l[i + 1][:2])
    789                         min_offset = 0
    790                     else:
    791                         raise ValueError(timestr)
    792 
    793                     res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)
    794 
    795                     # Look for a timezone name between parenthesis
    796                     if (i + 5 < len_l and
    797                             info.jump(l[i + 2]) and l[i + 3] == '(' and
    798                             l[i + 5] == ')' and
    799                             3 <= len(l[i + 4]) and
    800                             self._could_be_tzname(res.hour, res.tzname,
    801                                                   None, l[i + 4])):
    802                         # -0300 (BRST)
    803                         res.tzname = l[i + 4]
    804                         i += 4
    805 
    806                     i += 1
    807 
    808                 # Check jumps
    809                 elif not (info.jump(l[i]) or fuzzy):
    810                     raise ValueError(timestr)
    811 
    812                 else:
    813                     skipped_idxs.append(i)
    814                 i += 1
    815 
    816             # Process year/month/day
    817             year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)
    818 
    819             res.century_specified = ymd.century_specified
    820             res.year = year
    821             res.month = month
    822             res.day = day
    823 
    824         except (IndexError, ValueError):
    825             return None, None
    826 
    827         if not info.validate(res):
    828             return None, None
    829 
    830         if fuzzy_with_tokens:
    831             skipped_tokens = self._recombine_skipped(l, skipped_idxs)
    832             return res, tuple(skipped_tokens)
    833         else:
    834             return res, None
    835 
    836     def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):
    837         # Token is a number
    838         value_repr = tokens[idx]
    839         try:
    840             value = self._to_decimal(value_repr)
    841         except Exception as e:
    842             six.raise_from(ValueError('Unknown numeric token'), e)
    843 
    844         len_li = len(value_repr)
    845 
    846         len_l = len(tokens)
    847 
    848         if (len(ymd) == 3 and len_li in (2, 4) and
    849             res.hour is None and
    850             (idx + 1 >= len_l or
    851              (tokens[idx + 1] != ':' and
    852               info.hms(tokens[idx + 1]) is None))):
    853             # 19990101T23[59]
    854             s = tokens[idx]
    855             res.hour = int(s[:2])
    856 
    857             if len_li == 4:
    858                 res.minute = int(s[2:])
    859 
    860         elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):
    861             # YYMMDD or HHMMSS[.ss]
    862             s = tokens[idx]
    863 
    864             if not ymd and '.' not in tokens[idx]:
    865                 ymd.append(s[:2])
    866                 ymd.append(s[2:4])
    867                 ymd.append(s[4:])
    868             else:
    869                 # 19990101T235959[.59]
    870 
    871                 # TODO: Check if res attributes already set.
    872                 res.hour = int(s[:2])
    873                 res.minute = int(s[2:4])
    874                 res.second, res.microsecond = self._parsems(s[4:])
    875 
    876         elif len_li in (8, 12, 14):
    877             # YYYYMMDD
    878             s = tokens[idx]
    879             ymd.append(s[:4], 'Y')
    880             ymd.append(s[4:6])
    881             ymd.append(s[6:8])
    882 
    883             if len_li > 8:
    884                 res.hour = int(s[8:10])
    885                 res.minute = int(s[10:12])
    886 
    887                 if len_li > 12:
    888                     res.second = int(s[12:])
    889 
    890         elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:
    891             # HH[ ]h or MM[ ]m or SS[.ss][ ]s
    892             hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)
    893             (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)
    894             if hms is not None:
    895                 # TODO: checking that hour/minute/second are not
    896                 # already set?
    897                 self._assign_hms(res, value_repr, hms)
    898 
    899         elif idx + 2 < len_l and tokens[idx + 1] == ':':
    900             # HH:MM[:SS[.ss]]
    901             res.hour = int(value)
    902             value = self._to_decimal(tokens[idx + 2])  # TODO: try/except for this?
    903             (res.minute, res.second) = self._parse_min_sec(value)
    904 
    905             if idx + 4 < len_l and tokens[idx + 3] == ':':
    906                 res.second, res.microsecond = self._parsems(tokens[idx + 4])
    907 
    908                 idx += 2
    909 
    910             idx += 2
    911 
    912         elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):
    913             sep = tokens[idx + 1]
    914             ymd.append(value_repr)
    915 
    916             if idx + 2 < len_l and not info.jump(tokens[idx + 2]):
    917                 if tokens[idx + 2].isdigit():
    918                     # 01-01[-01]
    919                     ymd.append(tokens[idx + 2])
    920                 else:
    921                     # 01-Jan[-01]
    922                     value = info.month(tokens[idx + 2])
    923 
    924                     if value is not None:
    925                         ymd.append(value, 'M')
    926                     else:
    927                         raise ValueError()
    928 
    929                 if idx + 3 < len_l and tokens[idx + 3] == sep:
    930                     # We have three members
    931                     value = info.month(tokens[idx + 4])
    932 
    933                     if value is not None:
    934                         ymd.append(value, 'M')
    935                     else:
    936                         ymd.append(tokens[idx + 4])
    937                     idx += 2
    938 
    939                 idx += 1
    940             idx += 1
    941 
    942         elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):
    943             if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:
    944                 # 12 am
    945                 hour = int(value)
    946                 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))
    947                 idx += 1
    948             else:
    949                 # Year, month or day
    950                 ymd.append(value)
    951             idx += 1
    952 
    953         elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):
    954             # 12am
    955             hour = int(value)
    956             res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))
    957             idx += 1
    958 
    959         elif ymd.could_be_day(value):
    960             ymd.append(value)
    961 
    962         elif not fuzzy:
    963             raise ValueError()
    964 
    965         return idx
    966 
    967     def _find_hms_idx(self, idx, tokens, info, allow_jump):
    968         len_l = len(tokens)
    969 
    970         if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:
    971             # There is an "h", "m", or "s" label following this token.  We take
    972             # assign the upcoming label to the current token.
    973             # e.g. the "12" in 12h"
    974             hms_idx = idx + 1
    975 
    976         elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and
    977               info.hms(tokens[idx+2]) is not None):
    978             # There is a space and then an "h", "m", or "s" label.
    979             # e.g. the "12" in "12 h"
    980             hms_idx = idx + 2
    981 
    982         elif idx > 0 and info.hms(tokens[idx-1]) is not None:
    983             # There is a "h", "m", or "s" preceeding this token.  Since neither
    984             # of the previous cases was hit, there is no label following this
    985             # token, so we use the previous label.
    986             # e.g. the "04" in "12h04"
    987             hms_idx = idx-1
    988 
    989         elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and
    990               info.hms(tokens[idx-2]) is not None):
    991             # If we are looking at the final token, we allow for a
    992             # backward-looking check to skip over a space.
    993             # TODO: Are we sure this is the right condition here?
    994             hms_idx = idx - 2
    995 
    996         else:
    997             hms_idx = None
    998 
    999         return hms_idx
   1000 
   1001     def _assign_hms(self, res, value_repr, hms):
   1002         # See GH issue #427, fixing float rounding
   1003         value = self._to_decimal(value_repr)
   1004 
   1005         if hms == 0:
   1006             # Hour
   1007             res.hour = int(value)
   1008             if value % 1:
   1009                 res.minute = int(60*(value % 1))
   1010 
   1011         elif hms == 1:
   1012             (res.minute, res.second) = self._parse_min_sec(value)
   1013 
   1014         elif hms == 2:
   1015             (res.second, res.microsecond) = self._parsems(value_repr)
   1016 
   1017     def _could_be_tzname(self, hour, tzname, tzoffset, token):
   1018         return (hour is not None and
   1019                 tzname is None and
   1020                 tzoffset is None and
   1021                 len(token) <= 5 and
   1022                 all(x in string.ascii_uppercase for x in token))
   1023 
   1024     def _ampm_valid(self, hour, ampm, fuzzy):
   1025         """
   1026         For fuzzy parsing, 'a' or 'am' (both valid English words)
   1027         may erroneously trigger the AM/PM flag. Deal with that
   1028         here.
   1029         """
   1030         val_is_ampm = True
   1031 
   1032         # If there's already an AM/PM flag, this one isn't one.
   1033         if fuzzy and ampm is not None:
   1034             val_is_ampm = False
   1035 
   1036         # If AM/PM is found and hour is not, raise a ValueError
   1037         if hour is None:
   1038             if fuzzy:
   1039                 val_is_ampm = False
   1040             else:
   1041                 raise ValueError('No hour specified with AM or PM flag.')
   1042         elif not 0 <= hour <= 12:
   1043             # If AM/PM is found, it's a 12 hour clock, so raise
   1044             # an error for invalid range
   1045             if fuzzy:
   1046                 val_is_ampm = False
   1047             else:
   1048                 raise ValueError('Invalid hour specified for 12-hour clock.')
   1049 
   1050         return val_is_ampm
   1051 
   1052     def _adjust_ampm(self, hour, ampm):
   1053         if hour < 12 and ampm == 1:
   1054             hour += 12
   1055         elif hour == 12 and ampm == 0:
   1056             hour = 0
   1057         return hour
   1058 
   1059     def _parse_min_sec(self, value):
   1060         # TODO: Every usage of this function sets res.second to the return
   1061         # value. Are there any cases where second will be returned as None and
   1062         # we *dont* want to set res.second = None?
   1063         minute = int(value)
   1064         second = None
   1065 
   1066         sec_remainder = value % 1
   1067         if sec_remainder:
   1068             second = int(60 * sec_remainder)
   1069         return (minute, second)
   1070 
   1071     def _parsems(self, value):
   1072         """Parse a I[.F] seconds value into (seconds, microseconds)."""
   1073         if "." not in value:
   1074             return int(value), 0
   1075         else:
   1076             i, f = value.split(".")
   1077             return int(i), int(f.ljust(6, "0")[:6])
   1078 
   1079     def _parse_hms(self, idx, tokens, info, hms_idx):
   1080         # TODO: Is this going to admit a lot of false-positives for when we
   1081         # just happen to have digits and "h", "m" or "s" characters in non-date
   1082         # text?  I guess hex hashes won't have that problem, but there's plenty
   1083         # of random junk out there.
   1084         if hms_idx is None:
   1085             hms = None
   1086             new_idx = idx
   1087         elif hms_idx > idx:
   1088             hms = info.hms(tokens[hms_idx])
   1089             new_idx = hms_idx
   1090         else:
   1091             # Looking backwards, increment one.
   1092             hms = info.hms(tokens[hms_idx]) + 1
   1093             new_idx = idx
   1094 
   1095         return (new_idx, hms)
   1096 
   1097     def _recombine_skipped(self, tokens, skipped_idxs):
   1098         """
   1099         >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
   1100         >>> skipped_idxs = [0, 1, 2, 5]
   1101         >>> _recombine_skipped(tokens, skipped_idxs)
   1102         ["foo bar", "baz"]
   1103         """
   1104         skipped_tokens = []
   1105         for i, idx in enumerate(sorted(skipped_idxs)):
   1106             if i > 0 and idx - 1 == skipped_idxs[i - 1]:
   1107                 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
   1108             else:
   1109                 skipped_tokens.append(tokens[idx])
   1110 
   1111         return skipped_tokens
   1112 
   1113     def _build_tzinfo(self, tzinfos, tzname, tzoffset):
   1114         if callable(tzinfos):
   1115             tzdata = tzinfos(tzname, tzoffset)
   1116         else:
   1117             tzdata = tzinfos.get(tzname)
   1118 
   1119         if isinstance(tzdata, datetime.tzinfo):
   1120             tzinfo = tzdata
   1121         elif isinstance(tzdata, text_type):
   1122             tzinfo = tz.tzstr(tzdata)
   1123         elif isinstance(tzdata, integer_types):
   1124             tzinfo = tz.tzoffset(tzname, tzdata)
   1125         else:
   1126             raise ValueError("Offset must be tzinfo subclass, "
   1127                              "tz string, or int offset.")
   1128         return tzinfo
   1129 
   1130     def _build_tzaware(self, naive, res, tzinfos):
   1131         if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):
   1132             tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)
   1133             aware = naive.replace(tzinfo=tzinfo)
   1134             aware = self._assign_tzname(aware, res.tzname)
   1135 
   1136         elif res.tzname and res.tzname in time.tzname:
   1137             aware = naive.replace(tzinfo=tz.tzlocal())
   1138 
   1139             # Handle ambiguous local datetime
   1140             aware = self._assign_tzname(aware, res.tzname)
   1141 
   1142             # This is mostly relevant for winter GMT zones parsed in the UK
   1143             if (aware.tzname() != res.tzname and
   1144                     res.tzname in self.info.UTCZONE):
   1145                 aware = aware.replace(tzinfo=tz.tzutc())
   1146 
   1147         elif res.tzoffset == 0:
   1148             aware = naive.replace(tzinfo=tz.tzutc())
   1149 
   1150         elif res.tzoffset:
   1151             aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
   1152 
   1153         elif not res.tzname and not res.tzoffset:
   1154             # i.e. no timezone information was found.
   1155             aware = naive
   1156 
   1157         elif res.tzname:
   1158             # tz-like string was parsed but we don't know what to do
   1159             # with it
   1160             warnings.warn("tzname {tzname} identified but not understood.  "
   1161                           "Pass `tzinfos` argument in order to correctly "
   1162                           "return a timezone-aware datetime.  In a future "
   1163                           "version, this raise an "
   1164                           "exception.".format(tzname=res.tzname),
   1165                           category=UnknownTimezoneWarning)
   1166             aware = naive
   1167 
   1168         return aware
   1169 
   1170     def _build_naive(self, res, default):
   1171         repl = {}
   1172         for attr in ("year", "month", "day", "hour",
   1173                      "minute", "second", "microsecond"):
   1174             value = getattr(res, attr)
   1175             if value is not None:
   1176                 repl[attr] = value
   1177 
   1178         if 'day' not in repl:
   1179             # If the default day exceeds the last day of the month, fall back
   1180             # to the end of the month.
   1181             cyear = default.year if res.year is None else res.year
   1182             cmonth = default.month if res.month is None else res.month
   1183             cday = default.day if res.day is None else res.day
   1184 
   1185             if cday > monthrange(cyear, cmonth)[1]:
   1186                 repl['day'] = monthrange(cyear, cmonth)[1]
   1187 
   1188         naive = default.replace(**repl)
   1189 
   1190         if res.weekday is not None and not res.day:
   1191             naive = naive + relativedelta.relativedelta(weekday=res.weekday)
   1192 
   1193         return naive
   1194 
   1195     def _assign_tzname(self, dt, tzname):
   1196         if dt.tzname() != tzname:
   1197             new_dt = tz.enfold(dt, fold=1)
   1198             if new_dt.tzname() == tzname:
   1199                 return new_dt
   1200 
   1201         return dt
   1202 
   1203     def _to_decimal(self, val):
   1204         try:
   1205             return Decimal(val)
   1206         except Exception as e:
   1207             msg = "Could not convert %s to decimal" % val
   1208             six.raise_from(ValueError(msg), e)
   1209 
   1210 
   1211 DEFAULTPARSER = parser()
   1212 
   1213 
   1214 def parse(timestr, parserinfo=None, **kwargs):
   1215     """
   1216 
   1217     Parse a string in one of the supported formats, using the
   1218     ``parserinfo`` parameters.
   1219 
   1220     :param timestr:
   1221         A string containing a date/time stamp.
   1222 
   1223     :param parserinfo:
   1224         A :class:`parserinfo` object containing parameters for the parser.
   1225         If ``None``, the default arguments to the :class:`parserinfo`
   1226         constructor are used.
   1227 
   1228     The ``**kwargs`` parameter takes the following keyword arguments:
   1229 
   1230     :param default:
   1231         The default datetime object, if this is a datetime object and not
   1232         ``None``, elements specified in ``timestr`` replace elements in the
   1233         default object.
   1234 
   1235     :param ignoretz:
   1236         If set ``True``, time zones in parsed strings are ignored and a naive
   1237         :class:`datetime` object is returned.
   1238 
   1239     :param tzinfos:
   1240         Additional time zone names / aliases which may be present in the
   1241         string. This argument maps time zone names (and optionally offsets
   1242         from those time zones) to time zones. This parameter can be a
   1243         dictionary with timezone aliases mapping time zone names to time
   1244         zones or a function taking two parameters (``tzname`` and
   1245         ``tzoffset``) and returning a time zone.
   1246 
   1247         The timezones to which the names are mapped can be an integer
   1248         offset from UTC in seconds or a :class:`tzinfo` object.
   1249 
   1250         .. doctest::
   1251            :options: +NORMALIZE_WHITESPACE
   1252 
   1253             >>> from dateutil.parser import parse
   1254             >>> from dateutil.tz import gettz
   1255             >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
   1256             >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
   1257             datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
   1258             >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
   1259             datetime.datetime(2012, 1, 19, 17, 21,
   1260                               tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
   1261 
   1262         This parameter is ignored if ``ignoretz`` is set.
   1263 
   1264     :param dayfirst:
   1265         Whether to interpret the first value in an ambiguous 3-integer date
   1266         (e.g. 01/05/09) as the day (``True``) or month (``False``). If
   1267         ``yearfirst`` is set to ``True``, this distinguishes between YDM and
   1268         YMD. If set to ``None``, this value is retrieved from the current
   1269         :class:`parserinfo` object (which itself defaults to ``False``).
   1270 
   1271     :param yearfirst:
   1272         Whether to interpret the first value in an ambiguous 3-integer date
   1273         (e.g. 01/05/09) as the year. If ``True``, the first number is taken to
   1274         be the year, otherwise the last number is taken to be the year. If
   1275         this is set to ``None``, the value is retrieved from the current
   1276         :class:`parserinfo` object (which itself defaults to ``False``).
   1277 
   1278     :param fuzzy:
   1279         Whether to allow fuzzy parsing, allowing for string like "Today is
   1280         January 1, 2047 at 8:21:00AM".
   1281 
   1282     :param fuzzy_with_tokens:
   1283         If ``True``, ``fuzzy`` is automatically set to True, and the parser
   1284         will return a tuple where the first element is the parsed
   1285         :class:`datetime.datetime` datetimestamp and the second element is
   1286         a tuple containing the portions of the string which were ignored:
   1287 
   1288         .. doctest::
   1289 
   1290             >>> from dateutil.parser import parse
   1291             >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
   1292             (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
   1293 
   1294     :return:
   1295         Returns a :class:`datetime.datetime` object or, if the
   1296         ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
   1297         first element being a :class:`datetime.datetime` object, the second
   1298         a tuple containing the fuzzy tokens.
   1299 
   1300     :raises ValueError:
   1301         Raised for invalid or unknown string format, if the provided
   1302         :class:`tzinfo` is not in a valid format, or if an invalid date
   1303         would be created.
   1304 
   1305     :raises OverflowError:
   1306         Raised if the parsed date exceeds the largest valid C integer on
   1307         your system.
   1308     """
   1309     if parserinfo:
   1310         return parser(parserinfo).parse(timestr, **kwargs)
   1311     else:
   1312         return DEFAULTPARSER.parse(timestr, **kwargs)
   1313 
   1314 
   1315 class _tzparser(object):
   1316 
   1317     class _result(_resultbase):
   1318 
   1319         __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
   1320                      "start", "end"]
   1321 
   1322         class _attr(_resultbase):
   1323             __slots__ = ["month", "week", "weekday",
   1324                          "yday", "jyday", "day", "time"]
   1325 
   1326         def __repr__(self):
   1327             return self._repr("")
   1328 
   1329         def __init__(self):
   1330             _resultbase.__init__(self)
   1331             self.start = self._attr()
   1332             self.end = self._attr()
   1333 
   1334     def parse(self, tzstr):
   1335         res = self._result()
   1336         l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]
   1337         used_idxs = list()
   1338         try:
   1339 
   1340             len_l = len(l)
   1341 
   1342             i = 0
   1343             while i < len_l:
   1344                 # BRST+3[BRDT[+2]]
   1345                 j = i
   1346                 while j < len_l and not [x for x in l[j]
   1347                                          if x in "0123456789:,-+"]:
   1348                     j += 1
   1349                 if j != i:
   1350                     if not res.stdabbr:
   1351                         offattr = "stdoffset"
   1352                         res.stdabbr = "".join(l[i:j])
   1353                     else:
   1354                         offattr = "dstoffset"
   1355                         res.dstabbr = "".join(l[i:j])
   1356 
   1357                     for ii in range(j):
   1358                         used_idxs.append(ii)
   1359                     i = j
   1360                     if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
   1361                                        "0123456789")):
   1362                         if l[i] in ('+', '-'):
   1363                             # Yes, that's right.  See the TZ variable
   1364                             # documentation.
   1365                             signal = (1, -1)[l[i] == '+']
   1366                             used_idxs.append(i)
   1367                             i += 1
   1368                         else:
   1369                             signal = -1
   1370                         len_li = len(l[i])
   1371                         if len_li == 4:
   1372                             # -0300
   1373                             setattr(res, offattr, (int(l[i][:2]) * 3600 +
   1374                                                    int(l[i][2:]) * 60) * signal)
   1375                         elif i + 1 < len_l and l[i + 1] == ':':
   1376                             # -03:00
   1377                             setattr(res, offattr,
   1378                                     (int(l[i]) * 3600 +
   1379                                      int(l[i + 2]) * 60) * signal)
   1380                             used_idxs.append(i)
   1381                             i += 2
   1382                         elif len_li <= 2:
   1383                             # -[0]3
   1384                             setattr(res, offattr,
   1385                                     int(l[i][:2]) * 3600 * signal)
   1386                         else:
   1387                             return None
   1388                         used_idxs.append(i)
   1389                         i += 1
   1390                     if res.dstabbr:
   1391                         break
   1392                 else:
   1393                     break
   1394 
   1395 
   1396             if i < len_l:
   1397                 for j in range(i, len_l):
   1398                     if l[j] == ';':
   1399                         l[j] = ','
   1400 
   1401                 assert l[i] == ','
   1402 
   1403                 i += 1
   1404 
   1405             if i >= len_l:
   1406                 pass
   1407             elif (8 <= l.count(',') <= 9 and
   1408                   not [y for x in l[i:] if x != ','
   1409                        for y in x if y not in "0123456789+-"]):
   1410                 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
   1411                 for x in (res.start, res.end):
   1412                     x.month = int(l[i])
   1413                     used_idxs.append(i)
   1414                     i += 2
   1415                     if l[i] == '-':
   1416                         value = int(l[i + 1]) * -1
   1417                         used_idxs.append(i)
   1418                         i += 1
   1419                     else:
   1420                         value = int(l[i])
   1421                     used_idxs.append(i)
   1422                     i += 2
   1423                     if value:
   1424                         x.week = value
   1425                         x.weekday = (int(l[i]) - 1) % 7
   1426                     else:
   1427                         x.day = int(l[i])
   1428                     used_idxs.append(i)
   1429                     i += 2
   1430                     x.time = int(l[i])
   1431                     used_idxs.append(i)
   1432                     i += 2
   1433                 if i < len_l:
   1434                     if l[i] in ('-', '+'):
   1435                         signal = (-1, 1)[l[i] == "+"]
   1436                         used_idxs.append(i)
   1437                         i += 1
   1438                     else:
   1439                         signal = 1
   1440                     used_idxs.append(i)
   1441                     res.dstoffset = (res.stdoffset + int(l[i]) * signal)
   1442 
   1443                 # This was a made-up format that is not in normal use
   1444                 warn(('Parsed time zone "%s"' % tzstr) +
   1445                      'is in a non-standard dateutil-specific format, which ' +
   1446                      'is now deprecated; support for parsing this format ' +
   1447                      'will be removed in future versions. It is recommended ' +
   1448                      'that you switch to a standard format like the GNU ' +
   1449                      'TZ variable format.', tz.DeprecatedTzFormatWarning)
   1450             elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
   1451                   not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
   1452                                                      '.', '-', ':')
   1453                        for y in x if y not in "0123456789"]):
   1454                 for x in (res.start, res.end):
   1455                     if l[i] == 'J':
   1456                         # non-leap year day (1 based)
   1457                         used_idxs.append(i)
   1458                         i += 1
   1459                         x.jyday = int(l[i])
   1460                     elif l[i] == 'M':
   1461                         # month[-.]week[-.]weekday
   1462                         used_idxs.append(i)
   1463                         i += 1
   1464                         x.month = int(l[i])
   1465                         used_idxs.append(i)
   1466                         i += 1
   1467                         assert l[i] in ('-', '.')
   1468                         used_idxs.append(i)
   1469                         i += 1
   1470                         x.week = int(l[i])
   1471                         if x.week == 5:
   1472                             x.week = -1
   1473                         used_idxs.append(i)
   1474                         i += 1
   1475                         assert l[i] in ('-', '.')
   1476                         used_idxs.append(i)
   1477                         i += 1
   1478                         x.weekday = (int(l[i]) - 1) % 7
   1479                     else:
   1480                         # year day (zero based)
   1481                         x.yday = int(l[i]) + 1
   1482 
   1483                     used_idxs.append(i)
   1484                     i += 1
   1485 
   1486                     if i < len_l and l[i] == '/':
   1487                         used_idxs.append(i)
   1488                         i += 1
   1489                         # start time
   1490                         len_li = len(l[i])
   1491                         if len_li == 4:
   1492                             # -0300
   1493                             x.time = (int(l[i][:2]) * 3600 +
   1494                                       int(l[i][2:]) * 60)
   1495                         elif i + 1 < len_l and l[i + 1] == ':':
   1496                             # -03:00
   1497                             x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
   1498                             used_idxs.append(i)
   1499                             i += 2
   1500                             if i + 1 < len_l and l[i + 1] == ':':
   1501                                 used_idxs.append(i)
   1502                                 i += 2
   1503                                 x.time += int(l[i])
   1504                         elif len_li <= 2:
   1505                             # -[0]3
   1506                             x.time = (int(l[i][:2]) * 3600)
   1507                         else:
   1508                             return None
   1509                         used_idxs.append(i)
   1510                         i += 1
   1511 
   1512                     assert i == len_l or l[i] == ','
   1513 
   1514                     i += 1
   1515 
   1516                 assert i >= len_l
   1517 
   1518         except (IndexError, ValueError, AssertionError):
   1519             return None
   1520 
   1521         unused_idxs = set(range(len_l)).difference(used_idxs)
   1522         res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})
   1523         return res
   1524 
   1525 
   1526 DEFAULTTZPARSER = _tzparser()
   1527 
   1528 
   1529 def _parsetz(tzstr):
   1530     return DEFAULTTZPARSER.parse(tzstr)
   1531 
   1532 class UnknownTimezoneWarning(RuntimeWarning):
   1533     """Raised when the parser finds a timezone it cannot parse into a tzinfo"""
   1534 # vim:ts=4:sw=4:et
   1535