1 # Copyright 2016 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import base64 6 import codecs 7 import gzip 8 import json 9 import re 10 import StringIO 11 12 13 GZIP_HEADER_BYTES = b'\x1f\x8b' 14 15 16 # Regular expressions for matching the beginning and end of trace data in HTML 17 # traces. See tracing/extras/importer/trace2html_importer.html. 18 TRACE_DATA_START_LINE_RE = re.compile( 19 r'^<\s*script id="viewer-data" type="(application\/json|text\/plain)">$') 20 TRACE_DATA_END_LINE_RE = re.compile(r'^<\/\s*script>$') 21 22 23 def CopyTraceDataFromHTMLFilePath(html_path, trace_path, gzipped_output=False): 24 """Copies trace data from an existing HTML file into new trace file(s). 25 26 If |html_path| doesn't contain any trace data blocks, this function throws an 27 exception. If |html_path| contains more than one trace data block, the first 28 block will be extracted into |trace_path| and the rest will be extracted 29 into separate files |trace_path|.1, |trace_path|.2, etc. 30 31 The contents of each trace data block is decoded and, if |gzipped_output| is 32 false, inflated before it's stored in a trace file. 33 34 This function returns a list of paths of the saved trace files ([|trace_path|, 35 |trace_path|.1, |trace_path|.2, ...]). 36 """ 37 trace_data_list = _ExtractTraceDataFromHTMLFile(html_path, 38 unzip_data=not gzipped_output) 39 saved_paths = [] 40 for i, trace_data in enumerate(trace_data_list): 41 saved_path = trace_path if i == 0 else '%s.%d' % (trace_path, i) 42 saved_paths.append(saved_path) 43 with open(saved_path, 'wb' if gzipped_output else 'w') as trace_file: 44 trace_file.write(trace_data.read()) 45 return saved_paths 46 47 48 def ReadTracesFromHTMLFilePath(html_path): 49 """Returns a list of inflated JSON traces extracted from an HTML file.""" 50 return map(json.load, _ExtractTraceDataFromHTMLFile(html_path)) 51 52 53 def _ExtractTraceDataFromHTMLFile(html_path, unzip_data=True): 54 with codecs.open(html_path, mode='r', encoding='utf-8') as html_file: 55 lines = html_file.readlines() 56 57 start_indices = [i for i in xrange(len(lines)) 58 if TRACE_DATA_START_LINE_RE.match(lines[i])] 59 if not start_indices: 60 raise Exception('File %r does not contain trace data') 61 62 decoded_data_list = [] 63 for start_index in start_indices: 64 end_index = next(i for i in xrange(start_index + 1, len(lines)) 65 if TRACE_DATA_END_LINE_RE.match(lines[i])) 66 encoded_data = '\n'.join(lines[start_index + 1:end_index]).strip() 67 decoded_data_list.append(StringIO.StringIO(base64.b64decode(encoded_data))) 68 69 if unzip_data: 70 return map(_UnzipFileIfNecessary, decoded_data_list) 71 else: 72 return map(_ZipFileIfNecessary, decoded_data_list) 73 74 75 def _UnzipFileIfNecessary(original_file): 76 if _IsFileZipped(original_file): 77 return gzip.GzipFile(fileobj=original_file) 78 else: 79 return original_file 80 81 82 def _ZipFileIfNecessary(original_file): 83 if _IsFileZipped(original_file): 84 return original_file 85 else: 86 zipped_file = StringIO.StringIO() 87 with gzip.GzipFile(fileobj=zipped_file, mode='wb') as gzip_wrapper: 88 gzip_wrapper.write(original_file.read()) 89 zipped_file.seek(0) 90 return zipped_file 91 92 93 def _IsFileZipped(f): 94 is_gzipped = f.read(len(GZIP_HEADER_BYTES)) == GZIP_HEADER_BYTES 95 f.seek(0) 96 return is_gzipped 97