Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added last-modified comparison and --exclude glob syntax #31

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ Options
keys/files whose size is 0. Warning: S3/GS often uses
empty keys with special trailing characters to specify
directories.
--exclude Exclude files matching the specified pattern.
--size-only Only compare size of files in deciding what to transfer,
ignoring last-modified.
--delete Delete extraneous files from destination dirs after
the transfer has finished (e.g. rsync's --delete-
after).
Expand Down Expand Up @@ -151,14 +154,17 @@ http://boto.cloudhackers.com/en/latest/boto_config_tut.html
Known Issues and Limitations
================================================================================

* Differences between keys/files are assumed *only* by checking the size.
* Due to the nature of how directories work in S3/GS, some non-standard folder
structures might not transfer correctly. Empty directories may also be
overlooked in some cases. When in doubt, use "-n" first.
* Simple "globbing" (e.g. ``/path/*.zip``) is supported but may behave strangely
on some systems. See the "--glob" option's help text for more info.
* At this time, the script does not take advantage of boto's "multipart"
transfer methods. (pull requests welcome!)
* The last-modified time on the cloud platform may really be the time uploaded.
Comparison is done which attempts to preserve the more recent copy (similar to
``rsync --update``). You can use the ``--size-only`` option if this discrepancy
is problematic.


Disclaimers and Warnings
Expand Down
206 changes: 151 additions & 55 deletions bin/boto-rsync
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import sys, os, time, datetime, argparse, threading, signal
from fnmatch import fnmatch
import boto
from boto.utils import parse_ts

__version__ = '0.8.1'

Expand Down Expand Up @@ -209,6 +210,10 @@ def main():
help='Specify a specific S3 endpoint to connect to via boto\'s ' + \
'"host" connection argument (S3 only).'
)
parser.add_argument(
'--exclude', action='append', default=[],
help='Exclude files matching the specified pattern.'
)
parser.add_argument(
'-g', '--grant',
help='A canned ACL policy that will be granted on each file ' + \
Expand Down Expand Up @@ -271,6 +276,11 @@ def main():
'size is 0. Warning: S3/GS often uses empty keys with special ' + \
'trailing characters to specify directories.'
)
parser.add_argument(
'--size-only', action='store_true',
help='Only compare size of files in deciding what to transfer, ' + \
'ignoring last-modified.'
)
parser.add_argument(
'--delete', action='store_true',
help='Delete extraneous files from destination dirs after the ' + \
Expand Down Expand Up @@ -313,6 +323,7 @@ def main():
cloud_secret_access_key = args.cloud_secret_access_key
anon = args.anon
endpoint = args.endpoint
exclude = args.exclude
grant = args.grant
metadata = args.metadata
if not isinstance(metadata, dict):
Expand All @@ -325,6 +336,7 @@ def main():
no_recurse = args.no_recurse or glob
skip_dirkeys = args.skip_dirkeys
ignore_empty = args.ignore_empty
size_only = args.size_only
delete = args.delete
no_op = args.no_op
quiet = args.quiet
Expand Down Expand Up @@ -457,7 +469,7 @@ def main():
if glob and not fnmatch(key.name.split('/')[-1], glob):
continue

keys[key.name] = key.size
keys[key.name] = {'size': key.size, 'modified': parse_ts(key.last_modified)}
except Exception, e:
raise e
finally:
Expand All @@ -484,6 +496,21 @@ def main():
else:
key_name = cloud_path + get_key_name(root, path) + '/'

# Skip whole directory if matches exclude argument(s)
# (Still checks subdirectories, but saves a little time and verbosity.)
excludeDir = False
for excludePath in exclude:
if fnmatch(root+os.sep, excludePath):
excludeDir = True
break
if excludeDir:
if not quiet:
sys.stdout.write(
'Skipping %s (excluded path)\n' %
root
)
continue

if ignore_empty and not files:
if not quiet:
sys.stdout.write(
Expand All @@ -503,7 +530,7 @@ def main():
key_name.endswith('_$folder$'):
if not quiet:
sys.stdout.write(
'Skipping %s (size matches)\n' %
'Skipping %s (directory already exists)\n' %
key_name.replace('_$folder$', '/')
)
create_dirkey = False
Expand All @@ -529,7 +556,7 @@ def main():
reduced_redundancy=reduced,
encrypt_key=encrypt
)
keys[key_name] = 0
keys[key_name] = {'size': 0, 'modified': datetime.datetime.now()}

# Clean stdout
sys.stdout.write('\n')
Expand All @@ -542,6 +569,20 @@ def main():
key_name = cloud_path + get_key_name(fullpath, path)
file_size = os.path.getsize(fullpath)

# determine if the file should be excluded according to command line arguments.
excludeFile = False
for excludePath in exclude:
if fnmatch(fullpath, excludePath):
excludeFile = True
break
if excludeFile:
if not quiet:
sys.stdout.write(
'Skipping %s (excluded path)\n' %
fullpath[len(path):].lstrip(os.sep)
)
continue

if file_size == 0:
if ignore_empty:
if not quiet:
Expand All @@ -559,13 +600,22 @@ def main():
fullpath[len(path):].lstrip(os.sep)
)
continue
elif keys[key_name] == file_size:
if not quiet:
sys.stdout.write(
'Skipping %s (size matches)\n' %
fullpath[len(path):].lstrip(os.sep)
)
continue
elif keys[key_name]['size'] == file_size:
if size_only:
if not quiet:
sys.stdout.write(
'Skipping %s (size matches)\n' %
fullpath[len(path):].lstrip(os.sep)
)
continue
# Compare last modified
elif datetime.datetime.fromtimestamp(os.path.getmtime(fullpath)) <= keys[key_name]['modified']:
if not quiet:
sys.stdout.write(
'Skipping %s (not modified since last upload)\n' %
fullpath[len(path):].lstrip(os.sep)
)
continue

sys.stdout.write(
'%s\n' %
Expand All @@ -589,7 +639,7 @@ def main():
policy=grant, reduced_redundancy=reduced,
encrypt_key=encrypt
)
keys[key_name] = file_size
keys[key_name] = {'size': file_size, 'modified': datetime.datetime.now()}

# Clean stdout
sys.stdout.write('\n')
Expand Down Expand Up @@ -621,7 +671,7 @@ def main():
if key_name in keys:
del(keys[key_name])

for key_name, key_size in keys.iteritems():
for key_name, key_meta in keys.iteritems():
sys.stdout.write(
'deleting %s\n' %
key_name[len(cloud_path):].replace('_$folder$', '/')
Expand Down Expand Up @@ -664,16 +714,23 @@ def main():
sys.stdout.write('Skipping %s (not overwriting)\n' %
filename)
elif key.size == file_size:
copy_file = False
if not quiet:
if filename != key_name.split('/')[-1]:
sys.stdout.write(
'Skipping %s -> %s (size matches)\n' %
filename, key_name.split('/')[-1]
)
else:
sys.stdout.write('Skipping %s (size matches)\n' %
filename)
skip_reason = ''
if size_only:
skip_reason = 'size matches'
# Compare last modified
elif datetime.datetime.fromtimestamp(os.path.getmtime(path)) <= parse_ts(key.last_modified):
skip_reason = 'not modified since last upload'
if skip_reason != '':
copy_file = False
if not quiet:
if filename != key_name.split('/')[-1]:
sys.stdout.write(
'Skipping %s -> %s (%s)\n' %
(filename, key_name.split('/')[-1], skip_reason)
)
else:
sys.stdout.write('Skipping %s (%s)\n' %
(filename, skip_reason))

if copy_file:
if filename != key_name.split('/')[-1]:
Expand Down Expand Up @@ -767,19 +824,27 @@ def main():
)
copy_file = False
elif key.size == os.path.getsize(fullpath):
if not quiet:
if rename:
sys.stdout.write(
'Skipping %s -> %s (size matches)\n' %
keypath.replace('/', os.sep),
fullpath.split(os.sep)[-1]
)
else:
sys.stdout.write(
'Skipping %s (size matches)\n' %
fullpath.split(os.sep)[-1]
)
copy_file = False
skip_reason = ''
if size_only:
skip_reason = 'size matches'
# Compare last modified
elif parse_ts(key.last_modified) < datetime.datetime.fromtimestamp(os.path.getmtime(fullpath)):
skip_reason = 'local copy more recently modified'
if skip_reason != '':
if not quiet:
if rename:
sys.stdout.write(
'Skipping %s -> %s (%s)\n' %
(keypath.replace('/', os.sep),
fullpath.split(os.sep)[-1],
skip_reason)
)
else:
sys.stdout.write(
'Skipping %s (%s)\n' %
(fullpath.split(os.sep)[-1], skip_reason)
)
copy_file = False

if copy_file:
if rename:
Expand Down Expand Up @@ -856,12 +921,27 @@ def main():
fullpath[len(os.path.join(path, '')):]
)
continue
elif key.size == os.path.getsize(fullpath) or \
key.name.endswith('/') or \
elif key.size == os.path.getsize(fullpath):
if size_only:
if not quiet:
sys.stdout.write(
'Skipping %s (size matches)\n' %
fullpath[len(os.path.join(path, '')):]
)
continue
# Compare last modified
elif parse_ts(key.last_modified) < datetime.datetime.fromtimestamp(os.path.getmtime(fullpath)):
if not quiet:
sys.stdout.write(
'Skipping %s (modified since last upload)\n' %
fullpath[len(path):].lstrip(os.sep)
)
continue
elif key.name.endswith('/') or \
key.name.endswith('_$folder$'):
if not quiet:
sys.stdout.write(
'Skipping %s (size matches)\n' %
'Skipping %s (directory already exists)\n' %
fullpath[len(os.path.join(path, '')):]
)
continue
Expand Down Expand Up @@ -977,17 +1057,24 @@ def main():
)
copy_file = False
elif key.size == dest_key.size:
if not quiet:
if rename:
sys.stdout.write(
'Skipping %s -> %s (size matches)\n' %
keypath.split('/')[-1], fullpath.split('/')[-1]
)
else:
sys.stdout.write(
'Skipping %s (size matches)\n' % fullpath
)
copy_file = False
skip_reason = ''
if size_only:
skip_reason = 'size matches'
# Compare last modified
elif parse_ts(key.last_modified) < parse_ts(dest_key.last_modified):
skip_reason = 'destination more recently modified'
if skip_reason != '':
if not quiet:
if rename:
sys.stdout.write(
'Skipping %s -> %s (%s)\n' %
(keypath.split('/')[-1], fullpath.split('/')[-1], skip_reason)
)
else:
sys.stdout.write(
'Skipping %s (%s)\n' % (fullpath, skip_reason)
)
copy_file = False

if copy_file:
if rename:
Expand Down Expand Up @@ -1085,12 +1172,21 @@ def main():
)
continue
elif key.size == dest_key.size:
if not quiet:
sys.stdout.write(
'Skipping %s (size matches)\n' %
fullpath.replace('_$folder$', '/')
)
continue
if size_only:
if not quiet:
sys.stdout.write(
'Skipping %s (size matches)\n' %
fullpath.replace('_$folder$', '/')
)
continue
# Compare last modified
elif parse_ts(key.last_modified) < parse_ts(dest_key.last_modified):
if not quiet:
sys.stdout.write(
'Skipping %s (destination more recently modified)\n' %
fullpath.replace('_$folder$', '/')
)
continue

sys.stdout.write('%s... ' % keypath.replace('_$folder$', '/'))
sys.stdout.flush()
Expand Down