Home | History | Annotate | Download | only in archive
      1 #
      2 # Copyright (C) 2016 The Android Open Source Project
      3 #
      4 # Licensed under the Apache License, Version 2.0 (the "License");
      5 # you may not use this file except in compliance with the License.
      6 # You may obtain a copy of the License at
      7 #
      8 #      http://www.apache.org/licenses/LICENSE-2.0
      9 #
     10 # Unless required by applicable law or agreed to in writing, software
     11 # distributed under the License is distributed on an "AS IS" BASIS,
     12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 # See the License for the specific language governing permissions and
     14 # limitations under the License.
     15 #
     16 """Parses the contents of a Unix archive file generated using the 'ar' command.
     17 
     18 The constructor returns an Archive object, which contains dictionary from
     19 file name to file content.
     20 
     21 
     22     Typical usage example:
     23 
     24     archive = Archive(content)
     25     archive.Parse()
     26 """
     27 
     28 import io
     29 
     30 class Archive(object):
     31     """Archive object parses and stores Unix archive contents.
     32 
     33     Stores the file names and contents as it parses the archive.
     34 
     35     Attributes:
     36         files: a dictionary from file name (string) to file content (binary)
     37     """
     38 
     39     GLOBAL_SIG = '!<arch>\n'  # Unix global signature
     40     STRING_TABLE_ID = '//'
     41     STRING_TABLE_TERMINATOR = '/\n'
     42     SYM_TABLE_ID = '__.SYMDEF'
     43     FILE_ID_LENGTH = 16  # Number of bytes to store file identifier
     44     FILE_ID_TERMINATOR = '/'
     45     FILE_TIMESTAMP_LENGTH = 12  # Number of bytes to store file mod timestamp
     46     OWNER_ID_LENGTH = 6  # Number of bytes to store file owner ID
     47     GROUP_ID_LENGTH = 6  # Number of bytes to store file group ID
     48     FILE_MODE_LENGTH = 8  # Number of bytes to store file mode
     49     CONTENT_SIZE_LENGTH = 10  # Number of bytes to store content size
     50     END_TAG = '`\n'  # Header end tag
     51 
     52     def __init__(self, file_content):
     53         """Initialize and parse the archive contents.
     54 
     55         Args:
     56           file_content: Binary contents of the archive file.
     57         """
     58 
     59         self.files = {}
     60         self._content = file_content
     61         self._cursor = 0
     62         self._string_table = dict()
     63 
     64     def ReadBytes(self, n):
     65         """Reads n bytes from the content stream.
     66 
     67         Args:
     68             n: The integer number of bytes to read.
     69 
     70         Returns:
     71             The n-bit string (binary) of data from the content stream.
     72 
     73         Raises:
     74             ValueError: invalid file format.
     75         """
     76         if self._cursor + n > len(self._content):
     77             raise ValueError('Invalid file. EOF reached unexpectedly.')
     78 
     79         content = self._content[self._cursor : self._cursor + n]
     80         self._cursor += n
     81         return content
     82 
     83     def Parse(self):
     84         """Verifies the archive header and arses the contents of the archive.
     85 
     86         Raises:
     87             ValueError: invalid file format.
     88         """
     89         # Check global header
     90         sig = self.ReadBytes(len(self.GLOBAL_SIG))
     91         if sig != self.GLOBAL_SIG:
     92             raise ValueError('File is not a valid Unix archive.')
     93 
     94         # Read files in archive
     95         while self._cursor < len(self._content):
     96             self.ReadFile()
     97 
     98     def ReadFile(self):
     99         """Reads a file from the archive content stream.
    100 
    101         Raises:
    102             ValueError: invalid file format.
    103         """
    104         name = self.ReadBytes(self.FILE_ID_LENGTH).strip()
    105         self.ReadBytes(self.FILE_TIMESTAMP_LENGTH)
    106         self.ReadBytes(self.OWNER_ID_LENGTH)
    107         self.ReadBytes(self.GROUP_ID_LENGTH)
    108         self.ReadBytes(self.FILE_MODE_LENGTH)
    109         size = self.ReadBytes(self.CONTENT_SIZE_LENGTH)
    110         content_size = int(size)
    111 
    112         if self.ReadBytes(len(self.END_TAG)) != self.END_TAG:
    113             raise ValueError('File is not a valid Unix archive. Missing end tag.')
    114 
    115         content = self.ReadBytes(content_size)
    116         if name == self.STRING_TABLE_ID:
    117             acc = 0
    118             names = content.split(self.STRING_TABLE_TERMINATOR)
    119             for string in names:
    120                 self._string_table[acc] = string
    121                 acc += len(string) + len(self.STRING_TABLE_TERMINATOR)
    122         elif name != self.SYM_TABLE_ID:
    123             if name.endswith(self.FILE_ID_TERMINATOR):
    124                 name = name[:-len(self.FILE_ID_TERMINATOR)]
    125             elif name.startswith(self.FILE_ID_TERMINATOR):
    126                 offset = int(name[len(self.FILE_ID_TERMINATOR):])
    127                 if offset not in self._string_table:
    128                     raise ValueError('Offset %s not in string table.', offset)
    129                 name = self._string_table[offset]
    130             self.files[name] = content
    131