1 #!/usr/bin/python 2 # 3 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4 # Use of this source code is governed by a BSD-style license that can be 5 # found in the LICENSE file. 6 7 """Module to upload a MySQL dump file to Cloud SQL. 8 9 Usage: 10 dump_to_cloudsql.py [-h] [--resume NUM] [--user USER] [--passwd PASSWD] FILE 11 [REMOTE] 12 13 Uploads MySQL dump file to a MySQL database or Cloud SQL. With no optional 14 arguments will connect to localhost as root with an empty password. 15 16 positional arguments: 17 FILE text dump file containing MySQL commands 18 REMOTE Cloud SQL instance name or MySQL hostname 19 20 optional arguments: 21 -h, --help show this help message and exit 22 --resume NUM resume dump at command NUM 23 --user USER user (ignored for CloudSQL) 24 --passwd PASSWD passwd (ignored for CloudSQL) 25 """ 26 27 from __future__ import division 28 import argparse 29 import collections 30 import datetime 31 import os 32 import re 33 import sys 34 import time 35 36 37 BYTES_PER_GB = 2**30 38 39 40 class MySQLConnectionManager(object): 41 """Manages connections to a MySQL database. 42 43 Vars: 44 factory: A *ConnectionFactory. 45 connected: Whether we currently hold a live DB connection. 46 cmd_num: The number of commands executed. 47 """ 48 def __init__(self, connection_factory): 49 self.factory = connection_factory 50 self.connected = False 51 self.cmd_num = 0 52 53 def write(self, data, execute_cmd=True, increment_cmd=False): 54 """Buffers writes to command boundaries. 55 56 Args: 57 data: A line of data from the MySQL dump. 58 execute_cmd: Whether to execute the command, defaults to True. 59 increment_cmd: Whether to increment cmd_num, defaults to False. 60 """ 61 if not data or not data.strip() or data == '\n' or data[:2] == '--': 62 return 63 self._cmd += data[:-1] if data[-1] == '\n' else data 64 if self._cmd[-1] != ';': 65 return 66 # Execute command. 67 if execute_cmd: 68 self._cursor.execute(self._cmd.decode('utf-8')) 69 self._cmd = '' 70 if increment_cmd: 71 self.cmd_num += 1 72 73 def disconnect(self): 74 """Closes the current database connection.""" 75 if self.connected: 76 self.connected = False 77 self._cursor.close() 78 self._db.close() 79 80 def connect(self): 81 """Creates a new database connection.""" 82 self.disconnect() 83 self._db = self.factory.connect() 84 self.connected = True 85 self._cursor = self._db.cursor() 86 self._cmd = '' 87 88 89 class CloudSQLConnectionFactory(object): 90 """Creates Cloud SQL database connections.""" 91 def __init__(self, cloudsql_instance): 92 self._instance = cloudsql_instance 93 94 def connect(self): 95 """Connects to the Cloud SQL database and returns the connection. 96 97 Returns: 98 A MySQLdb compatible database connection to the Cloud SQL instance. 99 """ 100 print 'Connecting to Cloud SQL instance %s.' % self._instance 101 try: 102 from google.storage.speckle.python.api import rdbms_googleapi 103 except ImportError: 104 sys.exit('Unable to import rdbms_googleapi. Add the AppEngine SDK ' 105 'directory to your PYTHONPATH. Download the SDK from: ' 106 'https://developers.google.com/appengine/downloads') 107 return rdbms_googleapi.connect(None, instance=self._instance) 108 109 110 class LocalSQLConnectionFactory(object): 111 """Creates local MySQL database connections.""" 112 def __init__(self, host=None, user='root', passwd=''): 113 if not host: 114 host = 'localhost' 115 self._host = host 116 self._user = user 117 self._passwd = passwd 118 119 def connect(self): 120 """Connects to the local MySQL database and returns the connection. 121 122 Returns: 123 A MySQLdb database connection to the local MySQL database. 124 """ 125 print 'Connecting to mysql at localhost as %s.' % self._user 126 try: 127 import MySQLdb 128 except ImportError: 129 sys.exit('Unable to import MySQLdb. To install on Ubuntu: ' 130 'apt-get install python-mysqldb') 131 return MySQLdb.connect(host=self._host, user=self._user, 132 passwd=self._passwd) 133 134 135 class MySQLState(object): 136 """Maintains the MySQL global state. 137 138 This is a hack that keeps record of all MySQL lines that set global state. 139 These are needed to reconstruct the MySQL state on resume. 140 """ 141 _set_regex = re.compile('\S*\s*SET(.*)[\s=]') 142 143 def __init__(self): 144 self._db_line = '' 145 self._table_lock = [] 146 self._sets = collections.OrderedDict() 147 148 def process(self, line): 149 """Check and save lines that affect the global state. 150 151 Args: 152 line: A line from the MySQL dump file. 153 """ 154 # Most recent USE line. 155 if line[:3] == 'USE': 156 self._db_line = line 157 # SET variables. 158 m = self._set_regex.match(line) 159 if m: 160 self._sets[m.group(1).strip()] = line 161 # Maintain LOCK TABLES 162 if (line[:11] == 'LOCK TABLES' or 163 ('ALTER TABLE' in line and 'DISABLE KEYS' in line)): 164 self._table_lock.append(line) 165 if (line[:14] == 'UNLOCK TABLES;'): 166 self._table_lock = [] 167 168 def write(self, out): 169 """Print lines to recreate the saved state. 170 171 Args: 172 out: A File-like object to write out saved state. 173 """ 174 out.write(self._db_line) 175 for v in self._sets.itervalues(): 176 out.write(v) 177 for l in self._table_lock: 178 out.write(l) 179 180 def breakpoint(self, line): 181 """Returns true if we can handle breaking after this line. 182 183 Args: 184 line: A line from the MySQL dump file. 185 186 Returns: 187 Boolean indicating whether we can break after |line|. 188 """ 189 return (line[:28] == '-- Table structure for table' or 190 line[:11] == 'INSERT INTO') 191 192 193 def dump_to_cloudsql(dumpfile, manager, cmd_offset=0): 194 """Dumps a MySQL dump file to a database through a MySQLConnectionManager. 195 196 Args: 197 dumpfile: Path to a file from which to read the MySQL dump. 198 manager: An instance of MySQLConnectionManager. 199 cmd_offset: No commands will be executed on the database before this count 200 is reached. Used to continue an uncompleted dump. Defaults to 0. 201 """ 202 state = MySQLState() 203 total = os.path.getsize(dumpfile) 204 start_time = time.time() 205 line_num = 0 206 with open(dumpfile, 'r') as dump: 207 for line in dump: 208 line_num += 1 209 if not manager.connected: 210 manager.connect() 211 try: 212 # Construct commands from lines and execute them. 213 state.process(line) 214 if manager.cmd_num == cmd_offset and cmd_offset != 0: 215 print '\nRecreating state at line: %d' % line_num 216 state.write(manager) 217 manager.write(line, manager.cmd_num >= cmd_offset, True) 218 # Print status. 219 sys.stdout.write( 220 '\rstatus: %.3f%% %0.2f GB %d commands ' % 221 (100 * dump.tell() / total, dump.tell() / BYTES_PER_GB, 222 manager.cmd_num)) 223 sys.stdout.flush() 224 # Handle interrupts and connection failures. 225 except KeyboardInterrupt: 226 print ('\nInterrupted while executing command: %d' % 227 manager.cmd_num) 228 raise 229 except: 230 print '\nFailed while executing command: %d' % manager.cmd_num 231 delta = int(time.time() - start_time) 232 print 'Total time: %s' % str(datetime.timedelta(seconds=delta)) 233 if state.breakpoint(line): 234 # Attempt to resume. 235 print ('Execution can resume from here (line = %d)' % 236 line_num) 237 manager.cmd_num += 1 238 cmd_offset = manager.cmd_num 239 print ('Will now attempt to auto-resume at command: %d' % 240 cmd_offset) 241 manager.disconnect() 242 else: 243 print 'Execution may fail to resume correctly from here.' 244 print ('Use --resume=%d to attempt to resume the dump.' % 245 manager.cmd_num) 246 raise 247 print '\nDone.' 248 249 250 if __name__ == '__main__': 251 """Imports a MySQL database from a dump file. 252 253 Interprets command line arguments and calls dump_to_cloudsql appropriately. 254 """ 255 description = """Uploads MySQL dump file to a MySQL database or Cloud SQL. 256 With no optional arguments will connect to localhost as root 257 with an empty password.""" 258 parser = argparse.ArgumentParser(description=description) 259 parser.add_argument('mysqldump', metavar='FILE', 260 help='text dump file containing MySQL commands') 261 parser.add_argument('remote', default=None, nargs='?', metavar='REMOTE', 262 help='either a Cloud SQL account:instance or a hostname') 263 parser.add_argument('--resume', default=0, type=int, metavar='NUM', 264 help='resume dump at command NUM') 265 parser.add_argument('--user', default='root', metavar='USER', 266 help='user (ignored for Cloud SQL)') 267 parser.add_argument('--passwd', default='', metavar='PASSWD', 268 help='passwd (ignored for Cloud SQL)') 269 args = parser.parse_args() 270 if args.remote and ':' in args.remote: 271 connection = CloudSQLConnectionFactory(args.remote) 272 else: 273 connection = LocalSQLConnectionFactory(args.remote, args.user, 274 args.passwd) 275 if args.resume: 276 print 'Resuming execution at command: %d' % options.resume 277 dump_to_cloudsql(args.mysqldump, MySQLConnectionManager(connection), 278 args.resume) 279