Home | History | Annotate | Download | only in mod_pywebsocket
      1 # Copyright 2011, Google Inc.
      2 # All rights reserved.
      3 #
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 #
      8 #     * Redistributions of source code must retain the above copyright
      9 # notice, this list of conditions and the following disclaimer.
     10 #     * Redistributions in binary form must reproduce the above
     11 # copyright notice, this list of conditions and the following disclaimer
     12 # in the documentation and/or other materials provided with the
     13 # distribution.
     14 #     * Neither the name of Google Inc. nor the names of its
     15 # contributors may be used to endorse or promote products derived from
     16 # this software without specific prior written permission.
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 
     31 """Utilities for parsing and formatting headers that follow the grammar defined
     32 in HTTP RFC http://www.ietf.org/rfc/rfc2616.txt.
     33 """
     34 
     35 
     36 import urlparse
     37 
     38 
     39 _SEPARATORS = '()<>@,;:\\"/[]?={} \t'
     40 
     41 
     42 def _is_char(c):
     43     """Returns true iff c is in CHAR as specified in HTTP RFC."""
     44 
     45     return ord(c) <= 127
     46 
     47 
     48 def _is_ctl(c):
     49     """Returns true iff c is in CTL as specified in HTTP RFC."""
     50 
     51     return ord(c) <= 31 or ord(c) == 127
     52 
     53 
     54 class ParsingState(object):
     55 
     56     def __init__(self, data):
     57         self.data = data
     58         self.head = 0
     59 
     60 
     61 def peek(state, pos=0):
     62     """Peeks the character at pos from the head of data."""
     63 
     64     if state.head + pos >= len(state.data):
     65         return None
     66 
     67     return state.data[state.head + pos]
     68 
     69 
     70 def consume(state, amount=1):
     71     """Consumes specified amount of bytes from the head and returns the
     72     consumed bytes. If there's not enough bytes to consume, returns None.
     73     """
     74 
     75     if state.head + amount > len(state.data):
     76         return None
     77 
     78     result = state.data[state.head:state.head + amount]
     79     state.head = state.head + amount
     80     return result
     81 
     82 
     83 def consume_string(state, expected):
     84     """Given a parsing state and a expected string, consumes the string from
     85     the head. Returns True if consumed successfully. Otherwise, returns
     86     False.
     87     """
     88 
     89     pos = 0
     90 
     91     for c in expected:
     92         if c != peek(state, pos):
     93             return False
     94         pos += 1
     95 
     96     consume(state, pos)
     97     return True
     98 
     99 
    100 def consume_lws(state):
    101     """Consumes a LWS from the head. Returns True if any LWS is consumed.
    102     Otherwise, returns False.
    103 
    104     LWS = [CRLF] 1*( SP | HT )
    105     """
    106 
    107     original_head = state.head
    108 
    109     consume_string(state, '\r\n')
    110 
    111     pos = 0
    112 
    113     while True:
    114         c = peek(state, pos)
    115         if c == ' ' or c == '\t':
    116             pos += 1
    117         else:
    118             if pos == 0:
    119                 state.head = original_head
    120                 return False
    121             else:
    122                 consume(state, pos)
    123                 return True
    124 
    125 
    126 def consume_lwses(state):
    127     """Consumes *LWS from the head."""
    128 
    129     while consume_lws(state):
    130         pass
    131 
    132 
    133 def consume_token(state):
    134     """Consumes a token from the head. Returns the token or None if no token
    135     was found.
    136     """
    137 
    138     pos = 0
    139 
    140     while True:
    141         c = peek(state, pos)
    142         if c is None or c in _SEPARATORS or _is_ctl(c) or not _is_char(c):
    143             if pos == 0:
    144                 return None
    145 
    146             return consume(state, pos)
    147         else:
    148             pos += 1
    149 
    150 
    151 def consume_token_or_quoted_string(state):
    152     """Consumes a token or a quoted-string, and returns the token or unquoted
    153     string. If no token or quoted-string was found, returns None.
    154     """
    155 
    156     original_head = state.head
    157 
    158     if not consume_string(state, '"'):
    159         return consume_token(state)
    160 
    161     result = []
    162 
    163     expect_quoted_pair = False
    164 
    165     while True:
    166         if not expect_quoted_pair and consume_lws(state):
    167             result.append(' ')
    168             continue
    169 
    170         c = consume(state)
    171         if c is None:
    172             # quoted-string is not enclosed with double quotation
    173             state.head = original_head
    174             return None
    175         elif expect_quoted_pair:
    176             expect_quoted_pair = False
    177             if _is_char(c):
    178                 result.append(c)
    179             else:
    180                 # Non CHAR character found in quoted-pair
    181                 state.head = original_head
    182                 return None
    183         elif c == '\\':
    184             expect_quoted_pair = True
    185         elif c == '"':
    186             return ''.join(result)
    187         elif _is_ctl(c):
    188             # Invalid character %r found in qdtext
    189             state.head = original_head
    190             return None
    191         else:
    192             result.append(c)
    193 
    194 
    195 def quote_if_necessary(s):
    196     """Quotes arbitrary string into quoted-string."""
    197 
    198     quote = False
    199     if s == '':
    200         return '""'
    201 
    202     result = []
    203     for c in s:
    204         if c == '"' or c in _SEPARATORS or _is_ctl(c) or not _is_char(c):
    205             quote = True
    206 
    207         if c == '"' or _is_ctl(c):
    208             result.append('\\' + c)
    209         else:
    210             result.append(c)
    211 
    212     if quote:
    213         return '"' + ''.join(result) + '"'
    214     else:
    215         return ''.join(result)
    216 
    217 
    218 def parse_uri(uri):
    219     """Parse absolute URI then return host, port and resource."""
    220 
    221     parsed = urlparse.urlsplit(uri)
    222     if parsed.scheme != 'wss' and parsed.scheme != 'ws':
    223         # |uri| must be a relative URI.
    224         # TODO(toyoshim): Should validate |uri|.
    225         return None, None, uri
    226 
    227     if parsed.hostname is None:
    228         return None, None, None
    229 
    230     port = None
    231     try:
    232         port = parsed.port
    233     except ValueError, e:
    234         # port property cause ValueError on invalid null port description like
    235         # 'ws://host:/path'.
    236         return None, None, None
    237 
    238     if port is None:
    239         if parsed.scheme == 'ws':
    240             port = 80
    241         else:
    242             port = 443
    243 
    244     path = parsed.path
    245     if not path:
    246         path += '/'
    247     if parsed.query:
    248         path += '?' + parsed.query
    249     if parsed.fragment:
    250         path += '#' + parsed.fragment
    251 
    252     return parsed.hostname, port, path
    253 
    254 
    255 try:
    256     urlparse.uses_netloc.index('ws')
    257 except ValueError, e:
    258     # urlparse in Python2.5.1 doesn't have 'ws' and 'wss' entries.
    259     urlparse.uses_netloc.append('ws')
    260     urlparse.uses_netloc.append('wss')
    261 
    262 
    263 # vi:sts=4 sw=4 et
    264