Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of the restore command #9

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
94 changes: 91 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,94 @@
*.py[co]
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

/dist/
/docs/_build/
MANIFEST

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# IPython Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# dotenv
.env

# virtualenv
venv/
ENV/

# Spyder project settings
.spyderproject

# Rope project settings
.ropeproject

# Vim
*.swp
4 changes: 2 additions & 2 deletions django_archive/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__title__ = 'django_archive'
__version__ = '0.1.6'
__author__ = 'Nathan Osman'
__version__ = '0.2.0'
__author__ = 'Nathan Osman, Adnn'
94 changes: 71 additions & 23 deletions django_archive/management/commands/archive.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import OrderedDict
from datetime import datetime
from io import BytesIO
from json import dump
Expand All @@ -6,11 +7,14 @@

from django.apps.registry import apps
from django.conf import settings
from django.core.files.base import File
from django.core.management import call_command
from django.core.management.base import BaseCommand
from django.db import models
from django.utils.encoding import smart_bytes

from .utils import *

from ... import __version__


Expand Down Expand Up @@ -38,32 +42,55 @@ def write(self, data):
BytesIO.write(self, smart_bytes(data))


def walk_storage_files(storage, directory=""):
directories, files = storage.listdir(directory)
for f in files:
media_root_relative_path = format(path.join(directory, f))
storage_file = storage.open(media_root_relative_path , "rb")
# Some storage (at least FileSystemStorage) do not provide the 'name' argument to the File ctor on opening,
# which sets File.name to the absolute path. Instead, it should be relative to the media root.
storage_file.name = media_root_relative_path
yield storage_file
for d in directories:
for f in walk_storage_files(storage, path.join(directory, d)):
yield f


class Command(BaseCommand):
"""
Create a compressed archive of database tables and uploaded media.
Create an archive of database tables and uploaded media, potentially compressed.
"""

help = "Create a compressed archive of database tables and uploaded media."
help = "Create an archive of database tables and uploaded media, potentially compressed."

def handle(self, *args, **kwargs):
"""
Process the command.
"""

self.attr = AttributeRepository()

if not path.isdir(self.attr.get('ARCHIVE_DIRECTORY')):
self.stderr.write("Setting 'ARCHIVE_DIRECTORY' set to the non-existent directory '{}'."
.format(self.attr.get('ARCHIVE_DIRECTORY')))
exit(1)

with self._create_archive() as tar:
self._dump_db(tar)
self._dump_files(tar)
self._dump_meta(tar)
self.stdout.write("Backup completed.")
self.stdout.write("Backup completed to archive '{}'.".format(tar.name))


def _create_archive(self):
"""
Create the archive and return the TarFile.
"""
filename = getattr(settings, 'ARCHIVE_FILENAME', '%Y-%m-%d--%H-%M-%S')
fmt = getattr(settings, 'ARCHIVE_FORMAT', 'bz2')
filename = self.attr.get('ARCHIVE_FILENAME')
fmt = self.attr.get('ARCHIVE_FORMAT')
absolute_path = path.join(
getattr(settings, 'ARCHIVE_DIRECTORY', ''),
'%s.tar.%s' % (datetime.today().strftime(filename), fmt)
self.attr.get('ARCHIVE_DIRECTORY'),
'%s.tar%s' % (datetime.today().strftime(filename), '.'+fmt if fmt else '')
)
return TarFile.open(absolute_path, 'w:%s' % fmt)

Expand All @@ -72,23 +99,36 @@ def _dump_db(self, tar):
Dump the rows in each model to the archive.
"""

# Determine the list of models to exclude
exclude = getattr(settings, 'ARCHIVE_EXCLUDE', (
'auth.Permission',
'contenttypes.ContentType',
'sessions.Session',
))

# Dump the tables to a MixedIO
data = MixedIO()
call_command('dumpdata', all=True, format='json', exclude=exclude, stdout=data)
info = TarInfo('data.json')
call_command('dumpdata', all=True, format='json', indent=self.attr.get('ARCHIVE_DB_INDENT'),
exclude=self.attr.get('ARCHIVE_EXCLUDE'), stdout=data)
info = TarInfo(DB_DUMP)
info.size = data.rewind()
tar.addfile(info, data)

def _dump_files(self, tar):
if self.attr.get('ARCHIVE_MEDIA_POLICY') == 'all_files':
self._dump_all_files(tar)
elif self.attr.get('ARCHIVE_MEDIA_POLICY') == 'filefield_targets':
self._dump_referenced_files(tar)
elif self.attr.get('ARCHIVE_MEDIA_POLICY'):
self.stderr.write("Warning: ARCHIVE_MEDIA_POLICY value '{}' is not supported. Media files not archived."
.format(self.attr.get('ARCHIVE_MEDIA_POLICY')))

def _dump_all_files(self, tar):
"""
Dump all uploaded media to the archive.
Dump all media files found by the media storage class.
"""

media_storage = get_mediastorage()
for file in walk_storage_files(media_storage):
self._add_file(tar, file)
file.close()

def _dump_referenced_files(self, tar):
"""
Dump all media files that are reference by a FileField.
"""

# Loop through all models and find FileFields
Expand All @@ -106,18 +146,26 @@ def _dump_files(self, tar):
for field_name in field_names:
field = getattr(row, field_name)
if field:
field.open()
info = TarInfo(field.name)
info.size = field.size
tar.addfile(info, field)
self._add_file(tar, field)
field.close()

def _dump_meta(self, tar):
"""
Dump metadata to the archive.
"""
data = MixedIO()
dump({'version': __version__}, data)
info = TarInfo('meta.json')
meta_dict = OrderedDict((
('version', __version__),
('db_file', DB_DUMP),
('media_folder', MEDIA_DIR),
('settings', self.attr.settings_dict()),
))
dump(meta_dict, data, indent=2)
info = TarInfo(META_DUMP)
info.size = data.rewind()
tar.addfile(info, data)

def _add_file(self, tar, file):
info = TarInfo(path.join(MEDIA_DIR, file.name))
info.size = file.size
tar.addfile(info, file)
89 changes: 89 additions & 0 deletions django_archive/management/commands/restore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from .utils import *

from ... import __version__

from django.core.management import call_command
from django.core.management.base import BaseCommand

from json import load
from os import path
from tarfile import TarFile
import codecs, shutil, tempfile


class Command(BaseCommand):
"""
Read an existing archive of database tables and uploaded media, and restore them as application data.
"""

help = "Read an existing archive of database tables and uploaded media, and restore them as application data."

def __init__(self, *args, **kwargs):
super(Command, self).__init__(*args, **kwargs)
self.attr = AttributeRepository()
self.stored_medias = []

def add_arguments(self, parser):
parser.add_argument('archive')

def handle(self, *args, **options):
"""
Process the command.
"""
tar = self._open_archive(options)

self.meta_dict = self._load_meta(tar)
if self.meta_dict.get('version') > __version__:
self.stderr.write("The archive version {} is superior to the command version {}: restoration aborted."
.format(self.meta_dict.get('version'), __version__))
exit(1)

try:
self._load_files(tar)
self._load_db(tar)
self.stdout.write("Restoration completed.")
except Exception as e:
self.stderr.write("Aborted restoration as this exception occured: \n\t{}".format(e))
if self.stored_medias:
logpath = self._log_stored_files(options)
self.stderr.write("The list of media files that were restored before failure was dumped to '{}'."
.format(logpath))

tar.close()

def _open_archive(self, options):
return TarFile.open(self.generated_filepath(options['archive']))

def _load_meta(self, tar):
# extractfile returns a readonly file-like object supporting read() of binary data, where json.load expects str.
Reader = codecs.getreader("utf-8")
return load(Reader(tar.extractfile(META_DUMP)))

def _load_db(self, tar):
# Note: The loaddata command has quite advanced logic that should be duplicated here, sadly this management
# command can only get its input data from the filesystem, so we create a temporary file in order to use it.
db_element = self.meta_dict.get('db_file')
with tempfile.NamedTemporaryFile(suffix=".json") as temporary_extracted:
# -1 is given as a negative value disables "looping over the source data in chunks", which was causing truncation
shutil.copyfileobj(tar.extractfile(db_element), temporary_extracted, -1)
call_command('loaddata', temporary_extracted.name)

def _load_files(self, tar):
media_storage = get_mediastorage()
for media in [member for member in tar.getnames() if member.startswith(self.meta_dict.get('media_folder'))]:
original_name = path.relpath(media, self.meta_dict.get('media_folder'))
stored_name = media_storage.save(original_name, tar.extractfile(media))
if original_name != stored_name:
media_storage.delete(stored_name)
raise Exception("The media '{}' was saved under a different name '{}'.".format(original_name, stored_name))
else:
self.stored_medias.append(stored_name)

def _log_stored_files(self, options):
logpath = self.generated_filepath("{}.log".format(options['archive']))
with open(logpath, "w") as log:
log.write("Files written to media storage:\n\t{}".format("\n\t".join(self.stored_medias)))
return logpath

def generated_filepath(self, f_path):
return f_path if path.isabs(f_path) else path.join(self.attr.get('ARCHIVE_DIRECTORY'), f_path)
32 changes: 32 additions & 0 deletions django_archive/management/commands/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from django.conf import settings
from django.core.files.storage import DefaultStorage


DB_DUMP = 'data.json'
META_DUMP = 'meta.json'
MEDIA_DIR = '_media'


def get_mediastorage():
return DefaultStorage() # The default storage appears to be the one used for Media


class AttributeRepository(object):
defaults = {
'ARCHIVE_DIRECTORY': '',
'ARCHIVE_FILENAME': 'django-archive_%Y-%m-%d--%H-%M-%S',
'ARCHIVE_FORMAT': 'bz2',
'ARCHIVE_EXCLUDE': (
'auth.Permission',
'contenttypes.ContentType',
'sessions.Session',
),
'ARCHIVE_DB_INDENT': None,
'ARCHIVE_MEDIA_POLICY': 'all_files', #possible values: 'all_files', 'filefield_targets'
}

def get(self, name):
return getattr(settings, name, self.defaults[name])

def settings_dict(self):
return {setting: self.get(setting) for setting in self.defaults}
4 changes: 4 additions & 0 deletions pypi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Install twine in an activated venv

python setup.py sdist
twine upload dist/*
Loading