Home | History | Annotate | Download | only in json_comment_eater
      1 #!/usr/bin/env python
      2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 '''Utility to remove comments from JSON files so that they can be parsed by
      7 json.loads.
      8 '''
      9 
     10 import sys
     11 
     12 
     13 def _Rcount(string, chars):
     14   '''Returns the number of consecutive characters from |chars| that occur at the
     15   end of |string|.
     16   '''
     17   return len(string) - len(string.rstrip(chars))
     18 
     19 
     20 def _FindNextToken(string, tokens, start):
     21   '''Finds the next token in |tokens| that occurs in |string| from |start|.
     22   Returns a tuple (index, token key).
     23   '''
     24   min_index, min_key = (-1, None)
     25   for k in tokens:
     26     index = string.find(k, start)
     27     if index != -1 and (min_index == -1 or index < min_index):
     28       min_index, min_key = (index, k)
     29   return (min_index, min_key)
     30 
     31 
     32 def _ReadString(input, start, output):
     33   output.append('"')
     34   start_range, end_range = (start, input.find('"', start))
     35   # \" escapes the ", \\" doesn't, \\\" does, etc.
     36   while (end_range != -1 and
     37          _Rcount(input[start_range:end_range], '\\') % 2 == 1):
     38     start_range, end_range = (end_range, input.find('"', end_range + 1))
     39   if end_range == -1:
     40     return start_range + 1
     41   output.append(input[start:end_range + 1])
     42   return end_range + 1
     43 
     44 
     45 def _ReadComment(input, start, output):
     46   eol_tokens = ('\n', '\r')
     47   eol_token_index, eol_token = _FindNextToken(input, eol_tokens, start)
     48   if eol_token is None:
     49     return len(input)
     50   output.append(eol_token)
     51   return eol_token_index + len(eol_token)
     52 
     53 
     54 def Nom(input):
     55   token_actions = {
     56     '"': _ReadString,
     57     '//': _ReadComment,
     58   }
     59   output = []
     60   pos = 0
     61   while pos < len(input):
     62     token_index, token = _FindNextToken(input, token_actions.keys(), pos)
     63     if token is None:
     64       output.append(input[pos:])
     65       break
     66     output.append(input[pos:token_index])
     67     pos = token_actions[token](input, token_index + len(token), output)
     68   return ''.join(output)
     69 
     70 
     71 if __name__ == '__main__':
     72     sys.stdout.write(Nom(sys.stdin.read()))
     73