diff --git a/README.rst b/README.rst index 9ca9466..a799449 100644 --- a/README.rst +++ b/README.rst @@ -121,6 +121,9 @@ Options keys/files whose size is 0. Warning: S3/GS often uses empty keys with special trailing characters to specify directories. + --exclude Exclude files matching the specified pattern. + --size-only Only compare size of files in deciding what to transfer, + ignoring last-modified. --delete Delete extraneous files from destination dirs after the transfer has finished (e.g. rsync's --delete- after). @@ -151,7 +154,6 @@ http://boto.cloudhackers.com/en/latest/boto_config_tut.html Known Issues and Limitations ================================================================================ -* Differences between keys/files are assumed *only* by checking the size. * Due to the nature of how directories work in S3/GS, some non-standard folder structures might not transfer correctly. Empty directories may also be overlooked in some cases. When in doubt, use "-n" first. @@ -159,6 +161,10 @@ Known Issues and Limitations on some systems. See the "--glob" option's help text for more info. * At this time, the script does not take advantage of boto's "multipart" transfer methods. (pull requests welcome!) +* The last-modified time on the cloud platform may really be the time uploaded. + Comparison is done which attempts to preserve the more recent copy (similar to + ``rsync --update``). You can use the ``--size-only`` option if this discrepancy + is problematic. Disclaimers and Warnings diff --git a/bin/boto-rsync b/bin/boto-rsync index 76278b2..42e4297 100644 --- a/bin/boto-rsync +++ b/bin/boto-rsync @@ -24,6 +24,7 @@ import sys, os, time, datetime, argparse, threading, signal from fnmatch import fnmatch import boto +from boto.utils import parse_ts __version__ = '0.8.1' @@ -209,6 +210,10 @@ def main(): help='Specify a specific S3 endpoint to connect to via boto\'s ' + \ '"host" connection argument (S3 only).' ) + parser.add_argument( + '--exclude', action='append', default=[], + help='Exclude files matching the specified pattern.' + ) parser.add_argument( '-g', '--grant', help='A canned ACL policy that will be granted on each file ' + \ @@ -271,6 +276,11 @@ def main(): 'size is 0. Warning: S3/GS often uses empty keys with special ' + \ 'trailing characters to specify directories.' ) + parser.add_argument( + '--size-only', action='store_true', + help='Only compare size of files in deciding what to transfer, ' + \ + 'ignoring last-modified.' + ) parser.add_argument( '--delete', action='store_true', help='Delete extraneous files from destination dirs after the ' + \ @@ -313,6 +323,7 @@ def main(): cloud_secret_access_key = args.cloud_secret_access_key anon = args.anon endpoint = args.endpoint + exclude = args.exclude grant = args.grant metadata = args.metadata if not isinstance(metadata, dict): @@ -325,6 +336,7 @@ def main(): no_recurse = args.no_recurse or glob skip_dirkeys = args.skip_dirkeys ignore_empty = args.ignore_empty + size_only = args.size_only delete = args.delete no_op = args.no_op quiet = args.quiet @@ -457,7 +469,7 @@ def main(): if glob and not fnmatch(key.name.split('/')[-1], glob): continue - keys[key.name] = key.size + keys[key.name] = {'size': key.size, 'modified': parse_ts(key.last_modified)} except Exception, e: raise e finally: @@ -484,6 +496,21 @@ def main(): else: key_name = cloud_path + get_key_name(root, path) + '/' + # Skip whole directory if matches exclude argument(s) + # (Still checks subdirectories, but saves a little time and verbosity.) + excludeDir = False + for excludePath in exclude: + if fnmatch(root+os.sep, excludePath): + excludeDir = True + break + if excludeDir: + if not quiet: + sys.stdout.write( + 'Skipping %s (excluded path)\n' % + root + ) + continue + if ignore_empty and not files: if not quiet: sys.stdout.write( @@ -503,7 +530,7 @@ def main(): key_name.endswith('_$folder$'): if not quiet: sys.stdout.write( - 'Skipping %s (size matches)\n' % + 'Skipping %s (directory already exists)\n' % key_name.replace('_$folder$', '/') ) create_dirkey = False @@ -529,7 +556,7 @@ def main(): reduced_redundancy=reduced, encrypt_key=encrypt ) - keys[key_name] = 0 + keys[key_name] = {'size': 0, 'modified': datetime.datetime.now()} # Clean stdout sys.stdout.write('\n') @@ -542,6 +569,20 @@ def main(): key_name = cloud_path + get_key_name(fullpath, path) file_size = os.path.getsize(fullpath) + # determine if the file should be excluded according to command line arguments. + excludeFile = False + for excludePath in exclude: + if fnmatch(fullpath, excludePath): + excludeFile = True + break + if excludeFile: + if not quiet: + sys.stdout.write( + 'Skipping %s (excluded path)\n' % + fullpath[len(path):].lstrip(os.sep) + ) + continue + if file_size == 0: if ignore_empty: if not quiet: @@ -559,13 +600,22 @@ def main(): fullpath[len(path):].lstrip(os.sep) ) continue - elif keys[key_name] == file_size: - if not quiet: - sys.stdout.write( - 'Skipping %s (size matches)\n' % - fullpath[len(path):].lstrip(os.sep) - ) - continue + elif keys[key_name]['size'] == file_size: + if size_only: + if not quiet: + sys.stdout.write( + 'Skipping %s (size matches)\n' % + fullpath[len(path):].lstrip(os.sep) + ) + continue + # Compare last modified + elif datetime.datetime.fromtimestamp(os.path.getmtime(fullpath)) <= keys[key_name]['modified']: + if not quiet: + sys.stdout.write( + 'Skipping %s (not modified since last upload)\n' % + fullpath[len(path):].lstrip(os.sep) + ) + continue sys.stdout.write( '%s\n' % @@ -589,7 +639,7 @@ def main(): policy=grant, reduced_redundancy=reduced, encrypt_key=encrypt ) - keys[key_name] = file_size + keys[key_name] = {'size': file_size, 'modified': datetime.datetime.now()} # Clean stdout sys.stdout.write('\n') @@ -621,7 +671,7 @@ def main(): if key_name in keys: del(keys[key_name]) - for key_name, key_size in keys.iteritems(): + for key_name, key_meta in keys.iteritems(): sys.stdout.write( 'deleting %s\n' % key_name[len(cloud_path):].replace('_$folder$', '/') @@ -664,16 +714,23 @@ def main(): sys.stdout.write('Skipping %s (not overwriting)\n' % filename) elif key.size == file_size: - copy_file = False - if not quiet: - if filename != key_name.split('/')[-1]: - sys.stdout.write( - 'Skipping %s -> %s (size matches)\n' % - filename, key_name.split('/')[-1] - ) - else: - sys.stdout.write('Skipping %s (size matches)\n' % - filename) + skip_reason = '' + if size_only: + skip_reason = 'size matches' + # Compare last modified + elif datetime.datetime.fromtimestamp(os.path.getmtime(path)) <= parse_ts(key.last_modified): + skip_reason = 'not modified since last upload' + if skip_reason != '': + copy_file = False + if not quiet: + if filename != key_name.split('/')[-1]: + sys.stdout.write( + 'Skipping %s -> %s (%s)\n' % + (filename, key_name.split('/')[-1], skip_reason) + ) + else: + sys.stdout.write('Skipping %s (%s)\n' % + (filename, skip_reason)) if copy_file: if filename != key_name.split('/')[-1]: @@ -767,19 +824,27 @@ def main(): ) copy_file = False elif key.size == os.path.getsize(fullpath): - if not quiet: - if rename: - sys.stdout.write( - 'Skipping %s -> %s (size matches)\n' % - keypath.replace('/', os.sep), - fullpath.split(os.sep)[-1] - ) - else: - sys.stdout.write( - 'Skipping %s (size matches)\n' % - fullpath.split(os.sep)[-1] - ) - copy_file = False + skip_reason = '' + if size_only: + skip_reason = 'size matches' + # Compare last modified + elif parse_ts(key.last_modified) < datetime.datetime.fromtimestamp(os.path.getmtime(fullpath)): + skip_reason = 'local copy more recently modified' + if skip_reason != '': + if not quiet: + if rename: + sys.stdout.write( + 'Skipping %s -> %s (%s)\n' % + (keypath.replace('/', os.sep), + fullpath.split(os.sep)[-1], + skip_reason) + ) + else: + sys.stdout.write( + 'Skipping %s (%s)\n' % + (fullpath.split(os.sep)[-1], skip_reason) + ) + copy_file = False if copy_file: if rename: @@ -856,12 +921,27 @@ def main(): fullpath[len(os.path.join(path, '')):] ) continue - elif key.size == os.path.getsize(fullpath) or \ - key.name.endswith('/') or \ + elif key.size == os.path.getsize(fullpath): + if size_only: + if not quiet: + sys.stdout.write( + 'Skipping %s (size matches)\n' % + fullpath[len(os.path.join(path, '')):] + ) + continue + # Compare last modified + elif parse_ts(key.last_modified) < datetime.datetime.fromtimestamp(os.path.getmtime(fullpath)): + if not quiet: + sys.stdout.write( + 'Skipping %s (modified since last upload)\n' % + fullpath[len(path):].lstrip(os.sep) + ) + continue + elif key.name.endswith('/') or \ key.name.endswith('_$folder$'): if not quiet: sys.stdout.write( - 'Skipping %s (size matches)\n' % + 'Skipping %s (directory already exists)\n' % fullpath[len(os.path.join(path, '')):] ) continue @@ -977,17 +1057,24 @@ def main(): ) copy_file = False elif key.size == dest_key.size: - if not quiet: - if rename: - sys.stdout.write( - 'Skipping %s -> %s (size matches)\n' % - keypath.split('/')[-1], fullpath.split('/')[-1] - ) - else: - sys.stdout.write( - 'Skipping %s (size matches)\n' % fullpath - ) - copy_file = False + skip_reason = '' + if size_only: + skip_reason = 'size matches' + # Compare last modified + elif parse_ts(key.last_modified) < parse_ts(dest_key.last_modified): + skip_reason = 'destination more recently modified' + if skip_reason != '': + if not quiet: + if rename: + sys.stdout.write( + 'Skipping %s -> %s (%s)\n' % + (keypath.split('/')[-1], fullpath.split('/')[-1], skip_reason) + ) + else: + sys.stdout.write( + 'Skipping %s (%s)\n' % (fullpath, skip_reason) + ) + copy_file = False if copy_file: if rename: @@ -1085,12 +1172,21 @@ def main(): ) continue elif key.size == dest_key.size: - if not quiet: - sys.stdout.write( - 'Skipping %s (size matches)\n' % - fullpath.replace('_$folder$', '/') - ) - continue + if size_only: + if not quiet: + sys.stdout.write( + 'Skipping %s (size matches)\n' % + fullpath.replace('_$folder$', '/') + ) + continue + # Compare last modified + elif parse_ts(key.last_modified) < parse_ts(dest_key.last_modified): + if not quiet: + sys.stdout.write( + 'Skipping %s (destination more recently modified)\n' % + fullpath.replace('_$folder$', '/') + ) + continue sys.stdout.write('%s... ' % keypath.replace('_$folder$', '/')) sys.stdout.flush()