Home | History | Annotate | Download | only in docs
      1 #!/usr/bin/python
      2 
      3 #
      4 # Copyright (C) 2012 The Android Open Source Project
      5 #
      6 # Licensed under the Apache License, Version 2.0 (the "License");
      7 # you may not use this file except in compliance with the License.
      8 # You may obtain a copy of the License at
      9 #
     10 #      http://www.apache.org/licenses/LICENSE-2.0
     11 #
     12 # Unless required by applicable law or agreed to in writing, software
     13 # distributed under the License is distributed on an "AS IS" BASIS,
     14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 # See the License for the specific language governing permissions and
     16 # limitations under the License.
     17 #
     18 
     19 """
     20 Usage:
     21   metadata_validate.py <filename.xml>
     22   - validates that the metadata properties defined in filename.xml are
     23     semantically correct.
     24   - does not do any XSD validation, use xmllint for that (in metadata-validate)
     25 
     26 Module:
     27   A set of helpful functions for dealing with BeautifulSoup element trees.
     28   Especially the find_* and fully_qualified_name functions.
     29 
     30 Dependencies:
     31   BeautifulSoup - an HTML/XML parser available to download from
     32                   http://www.crummy.com/software/BeautifulSoup/
     33 """
     34 
     35 from bs4 import BeautifulSoup
     36 from bs4 import Tag
     37 import sys
     38 
     39 
     40 #####################
     41 #####################
     42 
     43 def fully_qualified_name(entry):
     44   """
     45   Calculates the fully qualified name for an entry by walking the path
     46   to the root node.
     47 
     48   Args:
     49     entry: a BeautifulSoup Tag corresponding to an <entry ...> XML node
     50 
     51   Returns:
     52     A string with the full name, e.g. "android.lens.info.availableApertureSizes"
     53   """
     54   filter_tags = ['namespace', 'section']
     55   parents = [i['name'] for i in entry.parents if i.name in filter_tags]
     56 
     57   name = entry['name']
     58 
     59   parents.reverse()
     60   parents.append(name)
     61 
     62   fqn = ".".join(parents)
     63 
     64   return fqn
     65 
     66 def find_parent_by_name(element, names):
     67   """
     68   Find the ancestor for an element whose name matches one of those
     69   in names.
     70 
     71   Args:
     72     element: A BeautifulSoup Tag corresponding to an XML node
     73 
     74   Returns:
     75     A BeautifulSoup element corresponding to the matched parent, or None.
     76 
     77     For example, assuming the following XML structure:
     78       <static>
     79         <anything>
     80           <entry name="Hello" />   # this is in variable 'Hello'
     81         </anything>
     82       </static>
     83 
     84       el = find_parent_by_name(Hello, ['static'])
     85       # el is now a value pointing to the '<static>' element
     86   """
     87   matching_parents = [i.name for i in element.parents if i.name in names]
     88 
     89   if matching_parents:
     90     return matching_parents[0]
     91   else:
     92     return None
     93 
     94 def find_all_child_tags(element, tag):
     95     """
     96     Finds all the children that are a Tag (as opposed to a NavigableString),
     97     with a name of tag. This is useful to filter out the NavigableString out
     98     of the children.
     99 
    100     Args:
    101       element: A BeautifulSoup Tag corresponding to an XML node
    102       tag: A string representing the name of the tag
    103 
    104     Returns:
    105       A list of Tag instances
    106 
    107       For example, given the following XML structure:
    108         <enum>                    # This is the variable el
    109           Hello world             # NavigableString
    110           <value>Apple</value>    # this is the variale apple (Tag)
    111           <value>Orange</value>   # this is the variable orange (Tag)
    112           Hello world again       # NavigableString
    113         </enum>
    114 
    115         lst = find_all_child_tags(el, 'value')
    116         # lst is [apple, orange]
    117 
    118     """
    119     matching_tags = [i for i in element.children if isinstance(i, Tag) and i.name == tag]
    120     return matching_tags
    121 
    122 def find_child_tag(element, tag):
    123     """
    124     Finds the first child that is a Tag with the matching name.
    125 
    126     Args:
    127       element: a BeautifulSoup Tag
    128       tag: A String representing the name of the tag
    129 
    130     Returns:
    131       An instance of a Tag, or None if there was no matches.
    132 
    133       For example, given the following XML structure:
    134         <enum>                    # This is the variable el
    135           Hello world             # NavigableString
    136           <value>Apple</value>    # this is the variale apple (Tag)
    137           <value>Orange</value>   # this is the variable orange (Tag)
    138           Hello world again       # NavigableString
    139         </enum>
    140 
    141         res = find_child_tag(el, 'value')
    142         # res is apple
    143     """
    144     matching_tags = find_all_child_tags(element, tag)
    145     if matching_tags:
    146         return matching_tags[0]
    147     else:
    148         return None
    149 
    150 def find_kind(element):
    151   """
    152   Finds the kind Tag ancestor for an element.
    153 
    154   Args:
    155     element: a BeautifulSoup Tag
    156 
    157   Returns:
    158     a BeautifulSoup tag, or None if there was no matches
    159 
    160   Remarks:
    161     This function only makes sense to be called for an Entry, Clone, or
    162     InnerNamespace XML types. It will always return 'None' for other nodes.
    163   """
    164   kinds = ['dynamic', 'static', 'controls']
    165   parent_kind = find_parent_by_name(element, kinds)
    166   return parent_kind
    167 
    168 def validate_error(msg):
    169   """
    170   Print a validation error to stderr.
    171 
    172   Args:
    173     msg: a string you want to be printed
    174   """
    175   print >> sys.stderr, "Validation error: " + msg
    176 
    177 
    178 def validate_clones(soup):
    179   """
    180   Validate that all <clone> elements point to an existing <entry> element.
    181 
    182   Args:
    183     soup - an instance of BeautifulSoup
    184 
    185   Returns:
    186     True if the validation succeeds, False otherwise
    187   """
    188   success = True
    189 
    190   for clone in soup.find_all("clone"):
    191     clone_entry = clone['entry']
    192     clone_kind = clone['kind']
    193 
    194     parent_kind = find_kind(clone)
    195 
    196     find_entry = lambda x: x.name == 'entry'                           \
    197                        and find_kind(x) == clone_kind                  \
    198                        and fully_qualified_name(x) == clone_entry
    199     matching_entry = soup.find(find_entry)
    200 
    201     if matching_entry is None:
    202       error_msg = ("Did not find corresponding clone entry '%s' " +    \
    203                "with kind '%s'") %(clone_entry, clone_kind)
    204       validate_error(error_msg)
    205       success = False
    206 
    207   return success
    208 
    209 # All <entry> elements with container=$foo have a <$foo> child
    210 # If type="enum", <enum> tag is present
    211 # In <enum> for all <value id="$x">, $x is numeric
    212 def validate_entries(soup):
    213   """
    214   Validate all <entry> elements with the following rules:
    215     * If there is a container="$foo" attribute, there is a <$foo> child
    216     * If there is a type="enum" attribute, there is an <enum> child
    217     * In the <enum> child, all <value id="$x"> have a numeric $x
    218 
    219   Args:
    220     soup - an instance of BeautifulSoup
    221 
    222   Returns:
    223     True if the validation succeeds, False otherwise
    224   """
    225   success = True
    226   for entry in soup.find_all("entry"):
    227     entry_container = entry.attrs.get('container')
    228 
    229     if entry_container is not None:
    230       container_tag = entry.find(entry_container)
    231 
    232       if container_tag is None:
    233         success = False
    234         validate_error(("Entry '%s' in kind '%s' has type '%s' but " +  \
    235                  "missing child element <%s>")                          \
    236                  %(fully_qualified_name(entry), find_kind(entry),       \
    237                  entry_container, entry_container))
    238 
    239     enum = entry.attrs.get('enum')
    240     if enum and enum == 'true':
    241       if entry.enum is None:
    242         validate_error(("Entry '%s' in kind '%s' is missing enum")     \
    243                                % (fully_qualified_name(entry), find_kind(entry),
    244                                   ))
    245         success = False
    246 
    247       else:
    248         for value in entry.enum.find_all('value'):
    249           value_id = value.attrs.get('id')
    250 
    251           if value_id is not None:
    252             try:
    253               id_int = int(value_id, 0) #autoguess base
    254             except ValueError:
    255               validate_error(("Entry '%s' has id '%s', which is not" + \
    256                                         " numeric.")                   \
    257                              %(fully_qualified_name(entry), value_id))
    258               success = False
    259     else:
    260       if entry.enum:
    261         validate_error(("Entry '%s' kind '%s' has enum el, but no enum attr")  \
    262                                % (fully_qualified_name(entry), find_kind(entry),
    263                                   ))
    264         success = False
    265 
    266   return success
    267 
    268 def validate_xml(file_name):
    269   """
    270   Validate all XML nodes according to the rules in validate_clones and
    271   validate_entries.
    272 
    273   Args:
    274     file_name - a string path to an XML file we wish to validate
    275 
    276   Returns:
    277     a BeautifulSoup instance if validation succeeds, None otherwise
    278   """
    279 
    280   xml = file(file_name).read()
    281   soup = BeautifulSoup(xml, features='xml')
    282 
    283   succ = validate_clones(soup)
    284   succ = validate_entries(soup) and succ
    285 
    286   if succ:
    287     return soup
    288   else:
    289     return None
    290 
    291 #####################
    292 #####################
    293 
    294 if __name__ == "__main__":
    295   if len(sys.argv) <= 1:
    296     print >> sys.stderr, "Usage: %s <filename.xml>" % (sys.argv[0])
    297     sys.exit(0)
    298 
    299   file_name = sys.argv[1]
    300   succ = validate_xml(file_name) is not None
    301 
    302   if succ:
    303     print "%s: SUCCESS! Document validated" %(file_name)
    304     sys.exit(0)
    305   else:
    306     print >> sys.stderr, "%s: ERRORS: Document failed to validate" %(file_name)
    307     sys.exit(1)
    308