Home | History | Annotate | Download | only in exiftool_parser
      1 #!/usr/bin/env python
      2 #
      3 # This parser parses the output from Phil Harvey's exiftool (version 9.02)
      4 # and convert it to xml format. It reads exiftool's output from stdin and
      5 # write the xml format to stdout.
      6 #
      7 # In order to get the raw infomation from exiftool, we need to enable the verbose
      8 # flag (-v2) of exiftool.
      9 #
     10 # Usage:
     11 #      exiftool -v2 img.jpg | ./parser.py >> output.xml
     12 #
     13 #
     14 
     15 import os
     16 import sys
     17 import re
     18 
     19 text = sys.stdin.read()
     20 
     21 print """<?xml version="1.0" encoding="utf-8"?>"""
     22 print "<exif>"
     23 
     24 # find the following two groups of string:
     25 #
     26 # 1. tag:
     27 #
     28 # | | | x) name = value
     29 # | | |     - Tag 0x1234
     30 #
     31 # 2. IFD indicator:
     32 #
     33 # | | | + [xxx directory with xx entries]
     34 #
     35 p = re.compile(
     36         "(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|"
     37         + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)"
     38         , re.M)
     39 tags = p.findall(text)
     40 
     41 layer = 0
     42 ifds = []
     43 
     44 for s in tags:
     45     # IFD indicator
     46     if s[2]:
     47         l = len(s[3])
     48         ifd = s[2][l + 3:].split()[0]
     49         new_layer = l / 2 + 1
     50         if new_layer > layer:
     51             ifds.append(ifd)
     52         else:
     53             for i in range(layer - new_layer):
     54                 ifds.pop()
     55             ifds[-1] = ifd
     56         layer = new_layer
     57     else:
     58         l = len(s[1])
     59         s = s[0]
     60         new_layer = l / 2
     61         if new_layer < layer:
     62             for i in range(layer - new_layer):
     63                 ifds.pop()
     64         layer = new_layer
     65 
     66         # find the ID
     67         _id = re.search("0x[0-9a-f]{4}", s)
     68         _id = _id.group(0)
     69 
     70         # find the name
     71         name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s)
     72         name = name.group(0).split()[1]
     73 
     74         # find the raw value in the parenthesis
     75         value = re.search("\(SubDirectory\) -->", s)
     76         if value:
     77             value = "NO_VALUE"
     78         else:
     79             value = re.search("\(.*\)\n", s)
     80             if (name != 'Model' and value):
     81                 value = value.group(0)[1:-2]
     82             else:
     83                 value = re.search("=.*\n", s)
     84                 value = value.group(0)[2:-1]
     85                 if "[snip]" in value:
     86                     value = "NO_VALUE"
     87 
     88         print ('    <tag ifd="' + ifds[-1] + '" id="'
     89             + _id + '" name="' + name +'">' + value + "</tag>")
     90 print "</exif>"
     91