Move S3 objects to an other bucket/prefix based on condition(s)

For one or many reasons, you may want to move or do any operation on a list of S3 objects. The following snippet, in python, allows you to move from one prefix to another in the same bucket after a certain period of time if you use EventBridge as CRON and/or if certain conditions are met. Feel free to adapt it to your needs. It uses Boto3 to interact with S3.

Environment variables which need to be add to Lambda Function:

  • S3_BUCKET : the bucket where to operate
  • S3_PREFIX_DESTINATION : the prefix destination (ending with /)
  • S3_PREFIX_SOURCE : the prefix source (ending with /)
import boto3, os

S3_BUCKET = os.environ['S3_BUCKET']
S3_PREFIX_DESTINATION = os.environ['S3_PREFIX_DESTINATION']
S3_PREFIX_SOURCE = os.environ['S3_PREFIX_SOURCE']

s3_client = boto3.client("s3")

def lambda_handler(event, context):
    # Get all S3 objects
    response = s3_client.list_objects_v2(Bucket=S3_BUCKET, Prefix=S3_PREFIX_SOURCE)
    s3_objects = response["Contents"]

    # Remove first item which is not an object but the prefix
    s3_objects.pop(0)

    print(str(len(s3_objects)) + " objects to check")

    # Create a list of S3 objects to archive
    s3_objects_to_archive = list(filter(is_file_need_to_be_archive, s3_objects))

    TOTAL_ARCHIVED_OBJECTS = str(len(s3_objects_to_archive))
    print(TOTAL_ARCHIVED_OBJECTS + " objects to archive")

    # Archive/move objects
    for s3_object in s3_objects_to_archive:
        archive_file(s3_object)

    return TOTAL_ARCHIVED_OBJECTS

def is_file_need_to_be_archive(s3_object):
    # Your logic code here...

    if # ...
        return True

    return False

def archive_file(s3_object):
    # Determine new object prefix (you could put it in deeper prefixes, with the date for example)
    s3_object_new_key = s3_object["Key"].replace(S3_PREFIX_SOURCE, S3_PREFIX_DESTINATION)

    print("Archiving " + s3_object["Key"] + " to " + s3_object_new_key)

    # Copy object to its new prefix & delete original object
    s3_client.copy_object(Bucket=S3_BUCKET, Key=s3_object_new_key, CopySource={'Bucket':S3_BUCKET, 'Key':s3_object["Key"]})
    s3_client.delete_object(Bucket=S3_BUCKET, Key=s3_object['Key'])