1 # 2 # Copyright (C) 2016 The Android Open Source Project 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); 5 # you may not use this file except in compliance with the License. 6 # You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 # 16 """Parses the contents of a Unix archive file generated using the 'ar' command. 17 18 The constructor returns an Archive object, which contains dictionary from 19 file name to file content. 20 21 22 Typical usage example: 23 24 archive = Archive(content) 25 archive.Parse() 26 """ 27 28 import io 29 30 class Archive(object): 31 """Archive object parses and stores Unix archive contents. 32 33 Stores the file names and contents as it parses the archive. 34 35 Attributes: 36 files: a dictionary from file name (string) to file content (binary) 37 """ 38 39 GLOBAL_SIG = '!<arch>\n' # Unix global signature 40 STRING_TABLE_ID = '//' 41 STRING_TABLE_TERMINATOR = '/\n' 42 SYM_TABLE_ID = '__.SYMDEF' 43 FILE_ID_LENGTH = 16 # Number of bytes to store file identifier 44 FILE_ID_TERMINATOR = '/' 45 FILE_TIMESTAMP_LENGTH = 12 # Number of bytes to store file mod timestamp 46 OWNER_ID_LENGTH = 6 # Number of bytes to store file owner ID 47 GROUP_ID_LENGTH = 6 # Number of bytes to store file group ID 48 FILE_MODE_LENGTH = 8 # Number of bytes to store file mode 49 CONTENT_SIZE_LENGTH = 10 # Number of bytes to store content size 50 END_TAG = '`\n' # Header end tag 51 52 def __init__(self, file_content): 53 """Initialize and parse the archive contents. 54 55 Args: 56 file_content: Binary contents of the archive file. 57 """ 58 59 self.files = {} 60 self._content = file_content 61 self._cursor = 0 62 self._string_table = dict() 63 64 def ReadBytes(self, n): 65 """Reads n bytes from the content stream. 66 67 Args: 68 n: The integer number of bytes to read. 69 70 Returns: 71 The n-bit string (binary) of data from the content stream. 72 73 Raises: 74 ValueError: invalid file format. 75 """ 76 if self._cursor + n > len(self._content): 77 raise ValueError('Invalid file. EOF reached unexpectedly.') 78 79 content = self._content[self._cursor : self._cursor + n] 80 self._cursor += n 81 return content 82 83 def Parse(self): 84 """Verifies the archive header and arses the contents of the archive. 85 86 Raises: 87 ValueError: invalid file format. 88 """ 89 # Check global header 90 sig = self.ReadBytes(len(self.GLOBAL_SIG)) 91 if sig != self.GLOBAL_SIG: 92 raise ValueError('File is not a valid Unix archive.') 93 94 # Read files in archive 95 while self._cursor < len(self._content): 96 self.ReadFile() 97 98 def ReadFile(self): 99 """Reads a file from the archive content stream. 100 101 Raises: 102 ValueError: invalid file format. 103 """ 104 name = self.ReadBytes(self.FILE_ID_LENGTH).strip() 105 self.ReadBytes(self.FILE_TIMESTAMP_LENGTH) 106 self.ReadBytes(self.OWNER_ID_LENGTH) 107 self.ReadBytes(self.GROUP_ID_LENGTH) 108 self.ReadBytes(self.FILE_MODE_LENGTH) 109 size = self.ReadBytes(self.CONTENT_SIZE_LENGTH) 110 content_size = int(size) 111 112 if self.ReadBytes(len(self.END_TAG)) != self.END_TAG: 113 raise ValueError('File is not a valid Unix archive. Missing end tag.') 114 115 content = self.ReadBytes(content_size) 116 if name == self.STRING_TABLE_ID: 117 acc = 0 118 names = content.split(self.STRING_TABLE_TERMINATOR) 119 for string in names: 120 self._string_table[acc] = string 121 acc += len(string) + len(self.STRING_TABLE_TERMINATOR) 122 elif name != self.SYM_TABLE_ID: 123 if name.endswith(self.FILE_ID_TERMINATOR): 124 name = name[:-len(self.FILE_ID_TERMINATOR)] 125 elif name.startswith(self.FILE_ID_TERMINATOR): 126 offset = int(name[len(self.FILE_ID_TERMINATOR):]) 127 if offset not in self._string_table: 128 raise ValueError('Offset %s not in string table.', offset) 129 name = self._string_table[offset] 130 self.files[name] = content 131