1 #!/usr/bin/env python 2 # 3 # This parser parses the output from Phil Harvey's exiftool (version 9.02) 4 # and convert it to xml format. It reads exiftool's output from stdin and 5 # write the xml format to stdout. 6 # 7 # In order to get the raw infomation from exiftool, we need to enable the verbose 8 # flag (-v2) of exiftool. 9 # 10 # Usage: 11 # exiftool -v2 img.jpg | ./parser.py >> output.xml 12 # 13 # 14 15 import os 16 import sys 17 import re 18 19 text = sys.stdin.read() 20 21 print """<?xml version="1.0" encoding="utf-8"?>""" 22 print "<exif>" 23 24 # find the following two groups of string: 25 # 26 # 1. tag: 27 # 28 # | | | x) name = value 29 # | | | - Tag 0x1234 30 # 31 # 2. IFD indicator: 32 # 33 # | | | + [xxx directory with xx entries] 34 # 35 p = re.compile( 36 "(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|" 37 + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)" 38 , re.M) 39 tags = p.findall(text) 40 41 layer = 0 42 ifds = [] 43 44 for s in tags: 45 # IFD indicator 46 if s[2]: 47 l = len(s[3]) 48 ifd = s[2][l + 3:].split()[0] 49 new_layer = l / 2 + 1 50 if new_layer > layer: 51 ifds.append(ifd) 52 else: 53 for i in range(layer - new_layer): 54 ifds.pop() 55 ifds[-1] = ifd 56 layer = new_layer 57 else: 58 l = len(s[1]) 59 s = s[0] 60 new_layer = l / 2 61 if new_layer < layer: 62 for i in range(layer - new_layer): 63 ifds.pop() 64 layer = new_layer 65 66 # find the ID 67 _id = re.search("0x[0-9a-f]{4}", s) 68 _id = _id.group(0) 69 70 # find the name 71 name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s) 72 name = name.group(0).split()[1] 73 74 # find the raw value in the parenthesis 75 value = re.search("\(SubDirectory\) -->", s) 76 if value: 77 value = "NO_VALUE" 78 else: 79 value = re.search("\(.*\)\n", s) 80 if (name != 'Model' and value): 81 value = value.group(0)[1:-2] 82 else: 83 value = re.search("=.*\n", s) 84 value = value.group(0)[2:-1] 85 if "[snip]" in value: 86 value = "NO_VALUE" 87 88 print (' <tag ifd="' + ifds[-1] + '" id="' 89 + _id + '" name="' + name +'">' + value + "</tag>") 90 print "</exif>" 91