Home | History | Annotate | Download | only in android
      1 # Copyright (c) 2013 The WebM project authors. All Rights Reserved.
      2 #
      3 # Use of this source code is governed by a BSD-style license
      4 # that can be found in the LICENSE file in the root of the source
      5 # tree. An additional intellectual property rights grant can be found
      6 # in the file PATENTS.  All contributing project authors may
      7 # be found in the AUTHORS file in the root of the source tree.
      8 #
      9 # This simple script pulls test files from the webm homepage
     10 # It is intelligent enough to only pull files if
     11 #   1) File / test_data folder does not exist
     12 #   2) SHA mismatch
     13 
     14 import pycurl
     15 import csv
     16 import hashlib
     17 import re
     18 import os.path
     19 import time
     20 import itertools
     21 import sys
     22 import getopt
     23 
     24 #globals
     25 url = ''
     26 file_list_path = ''
     27 local_resource_path = ''
     28 
     29 # Helper functions:
     30 # A simple function which returns the sha hash of a file in hex
     31 def get_file_sha(filename):
     32   try:
     33     sha_hash = hashlib.sha1()
     34     with open(filename, 'rb') as file:
     35       buf = file.read(HASH_CHUNK)
     36       while len(buf) > 0:
     37         sha_hash.update(buf)
     38         buf = file.read(HASH_CHUNK)
     39       return sha_hash.hexdigest()
     40   except IOError:
     41     print "Error reading " + filename
     42 
     43 # Downloads a file from a url, and then checks the sha against the passed
     44 # in sha
     45 def download_and_check_sha(url, filename, sha):
     46   path = os.path.join(local_resource_path, filename)
     47   fp = open(path, "wb")
     48   curl = pycurl.Curl()
     49   curl.setopt(pycurl.URL, url + "/" + filename)
     50   curl.setopt(pycurl.WRITEDATA, fp)
     51   curl.perform()
     52   curl.close()
     53   fp.close()
     54   return get_file_sha(path) == sha
     55 
     56 #constants
     57 ftp_retries = 3
     58 
     59 SHA_COL = 0
     60 NAME_COL = 1
     61 EXPECTED_COL = 2
     62 HASH_CHUNK = 65536
     63 
     64 # Main script
     65 try:
     66   opts, args = \
     67       getopt.getopt(sys.argv[1:], \
     68                     "u:i:o:", ["url=", "input_csv=", "output_dir="])
     69 except:
     70   print 'get_files.py -u <url> -i <input_csv> -o <output_dir>'
     71   sys.exit(2)
     72 
     73 for opt, arg in opts:
     74   if opt == '-u':
     75     url = arg
     76   elif opt in ("-i", "--input_csv"):
     77     file_list_path = os.path.join(arg)
     78   elif opt in ("-o", "--output_dir"):
     79     local_resource_path = os.path.join(arg)
     80 
     81 if len(sys.argv) != 7:
     82   print "Expects two paths and a url!"
     83   exit(1)
     84 
     85 if not os.path.isdir(local_resource_path):
     86   os.makedirs(local_resource_path)
     87 
     88 file_list_csv = open(file_list_path, "rb")
     89 
     90 # Our 'csv' file uses multiple spaces as a delimiter, python's
     91 # csv class only uses single character delimiters, so we convert them below
     92 file_list_reader = csv.reader((re.sub(' +', ' ', line) \
     93     for line in file_list_csv), delimiter = ' ')
     94 
     95 file_shas = []
     96 file_names = []
     97 
     98 for row in file_list_reader:
     99   if len(row) != EXPECTED_COL:
    100       continue
    101   file_shas.append(row[SHA_COL])
    102   file_names.append(row[NAME_COL])
    103 
    104 file_list_csv.close()
    105 
    106 # Download files, only if they don't already exist and have correct shas
    107 for filename, sha in itertools.izip(file_names, file_shas):
    108   path = os.path.join(local_resource_path, filename)
    109   if os.path.isfile(path) \
    110       and get_file_sha(path) == sha:
    111     print path + ' exists, skipping'
    112     continue
    113   for retry in range(0, ftp_retries):
    114     print "Downloading " + path
    115     if not download_and_check_sha(url, filename, sha):
    116       print "Sha does not match, retrying..."
    117     else:
    118       break
    119